mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-03-09 12:50:23 -05:00
Merge remote-tracking branch 'origin' into yt-live-from-start-range
This commit is contained in:
commit
1f7974690e
98 changed files with 7110 additions and 3283 deletions
6
.github/ISSUE_TEMPLATE/1_broken_site.yml
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.yml
vendored
|
@ -1,5 +1,5 @@
|
||||||
name: Broken site
|
name: Broken site support
|
||||||
description: Report error in a supported site
|
description: Report issue with yt-dlp on a supported site
|
||||||
labels: [triage, site-bug]
|
labels: [triage, site-bug]
|
||||||
body:
|
body:
|
||||||
- type: checkboxes
|
- type: checkboxes
|
||||||
|
@ -16,7 +16,7 @@ body:
|
||||||
description: |
|
description: |
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||||
options:
|
options:
|
||||||
- label: I'm reporting that a **supported** site is broken
|
- label: I'm reporting that yt-dlp is broken on a **supported** site
|
||||||
required: true
|
required: true
|
||||||
- label: I've verified that I'm running yt-dlp version **2023.03.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
- label: I've verified that I'm running yt-dlp version **2023.03.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||||
required: true
|
required: true
|
||||||
|
|
2
.github/ISSUE_TEMPLATE/4_bug_report.yml
vendored
2
.github/ISSUE_TEMPLATE/4_bug_report.yml
vendored
|
@ -1,4 +1,4 @@
|
||||||
name: Bug report
|
name: Core bug report
|
||||||
description: Report a bug unrelated to any particular site or extractor
|
description: Report a bug unrelated to any particular site or extractor
|
||||||
labels: [triage, bug]
|
labels: [triage, bug]
|
||||||
body:
|
body:
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
name: Broken site
|
name: Broken site support
|
||||||
description: Report error in a supported site
|
description: Report issue with yt-dlp on a supported site
|
||||||
labels: [triage, site-bug]
|
labels: [triage, site-bug]
|
||||||
body:
|
body:
|
||||||
%(no_skip)s
|
%(no_skip)s
|
||||||
|
@ -10,7 +10,7 @@ body:
|
||||||
description: |
|
description: |
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp:
|
||||||
options:
|
options:
|
||||||
- label: I'm reporting that a **supported** site is broken
|
- label: I'm reporting that yt-dlp is broken on a **supported** site
|
||||||
required: true
|
required: true
|
||||||
- label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
- label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||||
required: true
|
required: true
|
||||||
|
|
2
.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml
vendored
2
.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml
vendored
|
@ -1,4 +1,4 @@
|
||||||
name: Bug report
|
name: Core bug report
|
||||||
description: Report a bug unrelated to any particular site or extractor
|
description: Report a bug unrelated to any particular site or extractor
|
||||||
labels: [triage, bug]
|
labels: [triage, bug]
|
||||||
body:
|
body:
|
||||||
|
|
6
.github/PULL_REQUEST_TEMPLATE.md
vendored
6
.github/PULL_REQUEST_TEMPLATE.md
vendored
|
@ -40,4 +40,10 @@ ### What is the purpose of your *pull request*?
|
||||||
- [ ] Core bug fix/improvement
|
- [ ] Core bug fix/improvement
|
||||||
- [ ] New feature (It is strongly [recommended to open an issue first](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#adding-new-feature-or-making-overarching-changes))
|
- [ ] New feature (It is strongly [recommended to open an issue first](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#adding-new-feature-or-making-overarching-changes))
|
||||||
|
|
||||||
|
|
||||||
|
<!-- Do NOT edit/remove anything below this! -->
|
||||||
|
</details><details><summary>Copilot Summary</summary>
|
||||||
|
|
||||||
|
copilot:all
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
90
.github/workflows/build.yml
vendored
90
.github/workflows/build.yml
vendored
|
@ -41,7 +41,7 @@ on:
|
||||||
required: true
|
required: true
|
||||||
type: string
|
type: string
|
||||||
channel:
|
channel:
|
||||||
description: Update channel (stable/nightly)
|
description: Update channel (stable/nightly/...)
|
||||||
required: true
|
required: true
|
||||||
default: stable
|
default: stable
|
||||||
type: string
|
type: string
|
||||||
|
@ -127,6 +127,19 @@ jobs:
|
||||||
mv ./dist/yt-dlp_linux ./yt-dlp_linux
|
mv ./dist/yt-dlp_linux ./yt-dlp_linux
|
||||||
mv ./dist/yt-dlp_linux.zip ./yt-dlp_linux.zip
|
mv ./dist/yt-dlp_linux.zip ./yt-dlp_linux.zip
|
||||||
|
|
||||||
|
- name: Verify --update-to
|
||||||
|
if: vars.UPDATE_TO_VERIFICATION
|
||||||
|
run: |
|
||||||
|
binaries=("yt-dlp" "yt-dlp_linux")
|
||||||
|
for binary in "${binaries[@]}"; do
|
||||||
|
chmod +x ./${binary}
|
||||||
|
cp ./${binary} ./${binary}_downgraded
|
||||||
|
version="$(./${binary} --version)"
|
||||||
|
./${binary}_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
|
||||||
|
downgraded_version="$(./${binary}_downgraded --version)"
|
||||||
|
[[ "$version" != "$downgraded_version" ]]
|
||||||
|
done
|
||||||
|
|
||||||
- name: Upload artifacts
|
- name: Upload artifacts
|
||||||
uses: actions/upload-artifact@v3
|
uses: actions/upload-artifact@v3
|
||||||
with:
|
with:
|
||||||
|
@ -176,6 +189,16 @@ jobs:
|
||||||
python3.8 devscripts/make_lazy_extractors.py
|
python3.8 devscripts/make_lazy_extractors.py
|
||||||
python3.8 pyinst.py
|
python3.8 pyinst.py
|
||||||
|
|
||||||
|
if ${{ vars.UPDATE_TO_VERIFICATION && 'true' || 'false' }}; then
|
||||||
|
arch="${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}"
|
||||||
|
chmod +x ./dist/yt-dlp_linux_${arch}
|
||||||
|
cp ./dist/yt-dlp_linux_${arch} ./dist/yt-dlp_linux_${arch}_downgraded
|
||||||
|
version="$(./dist/yt-dlp_linux_${arch} --version)"
|
||||||
|
./dist/yt-dlp_linux_${arch}_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
|
||||||
|
downgraded_version="$(./dist/yt-dlp_linux_${arch}_downgraded --version)"
|
||||||
|
[[ "$version" != "$downgraded_version" ]]
|
||||||
|
fi
|
||||||
|
|
||||||
- name: Upload artifacts
|
- name: Upload artifacts
|
||||||
uses: actions/upload-artifact@v3
|
uses: actions/upload-artifact@v3
|
||||||
with:
|
with:
|
||||||
|
@ -188,21 +211,33 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
# NB: In order to create a universal2 application, the version of python3 in /usr/bin has to be used
|
# NB: Building universal2 does not work with python from actions/setup-python
|
||||||
- name: Install Requirements
|
- name: Install Requirements
|
||||||
run: |
|
run: |
|
||||||
brew install coreutils
|
brew install coreutils
|
||||||
/usr/bin/python3 -m pip install -U --user pip Pyinstaller==5.8 -r requirements.txt
|
python3 -m pip install -U --user pip setuptools wheel
|
||||||
|
# We need to ignore wheels otherwise we break universal2 builds
|
||||||
|
python3 -m pip install -U --user --no-binary :all: Pyinstaller -r requirements.txt
|
||||||
|
|
||||||
- name: Prepare
|
- name: Prepare
|
||||||
run: |
|
run: |
|
||||||
/usr/bin/python3 devscripts/update-version.py -c ${{ inputs.channel }} ${{ inputs.version }}
|
python3 devscripts/update-version.py -c ${{ inputs.channel }} ${{ inputs.version }}
|
||||||
/usr/bin/python3 devscripts/make_lazy_extractors.py
|
python3 devscripts/make_lazy_extractors.py
|
||||||
- name: Build
|
- name: Build
|
||||||
run: |
|
run: |
|
||||||
/usr/bin/python3 pyinst.py --target-architecture universal2 --onedir
|
python3 pyinst.py --target-architecture universal2 --onedir
|
||||||
(cd ./dist/yt-dlp_macos && zip -r ../yt-dlp_macos.zip .)
|
(cd ./dist/yt-dlp_macos && zip -r ../yt-dlp_macos.zip .)
|
||||||
/usr/bin/python3 pyinst.py --target-architecture universal2
|
python3 pyinst.py --target-architecture universal2
|
||||||
|
|
||||||
|
- name: Verify --update-to
|
||||||
|
if: vars.UPDATE_TO_VERIFICATION
|
||||||
|
run: |
|
||||||
|
chmod +x ./dist/yt-dlp_macos
|
||||||
|
cp ./dist/yt-dlp_macos ./dist/yt-dlp_macos_downgraded
|
||||||
|
version="$(./dist/yt-dlp_macos --version)"
|
||||||
|
./dist/yt-dlp_macos_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
|
||||||
|
downgraded_version="$(./dist/yt-dlp_macos_downgraded --version)"
|
||||||
|
[[ "$version" != "$downgraded_version" ]]
|
||||||
|
|
||||||
- name: Upload artifacts
|
- name: Upload artifacts
|
||||||
uses: actions/upload-artifact@v3
|
uses: actions/upload-artifact@v3
|
||||||
|
@ -232,7 +267,8 @@ jobs:
|
||||||
- name: Install Requirements
|
- name: Install Requirements
|
||||||
run: |
|
run: |
|
||||||
brew install coreutils
|
brew install coreutils
|
||||||
python3 -m pip install -U --user pip Pyinstaller -r requirements.txt
|
python3 -m pip install -U --user pip setuptools wheel
|
||||||
|
python3 -m pip install -U --user Pyinstaller -r requirements.txt
|
||||||
|
|
||||||
- name: Prepare
|
- name: Prepare
|
||||||
run: |
|
run: |
|
||||||
|
@ -243,6 +279,16 @@ jobs:
|
||||||
python3 pyinst.py
|
python3 pyinst.py
|
||||||
mv dist/yt-dlp_macos dist/yt-dlp_macos_legacy
|
mv dist/yt-dlp_macos dist/yt-dlp_macos_legacy
|
||||||
|
|
||||||
|
- name: Verify --update-to
|
||||||
|
if: vars.UPDATE_TO_VERIFICATION
|
||||||
|
run: |
|
||||||
|
chmod +x ./dist/yt-dlp_macos_legacy
|
||||||
|
cp ./dist/yt-dlp_macos_legacy ./dist/yt-dlp_macos_legacy_downgraded
|
||||||
|
version="$(./dist/yt-dlp_macos_legacy --version)"
|
||||||
|
./dist/yt-dlp_macos_legacy_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
|
||||||
|
downgraded_version="$(./dist/yt-dlp_macos_legacy_downgraded --version)"
|
||||||
|
[[ "$version" != "$downgraded_version" ]]
|
||||||
|
|
||||||
- name: Upload artifacts
|
- name: Upload artifacts
|
||||||
uses: actions/upload-artifact@v3
|
uses: actions/upload-artifact@v3
|
||||||
with:
|
with:
|
||||||
|
@ -275,6 +321,19 @@ jobs:
|
||||||
python pyinst.py --onedir
|
python pyinst.py --onedir
|
||||||
Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip
|
Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip
|
||||||
|
|
||||||
|
- name: Verify --update-to
|
||||||
|
if: vars.UPDATE_TO_VERIFICATION
|
||||||
|
run: |
|
||||||
|
foreach ($name in @("yt-dlp","yt-dlp_min")) {
|
||||||
|
Copy-Item "./dist/${name}.exe" "./dist/${name}_downgraded.exe"
|
||||||
|
$version = & "./dist/${name}.exe" --version
|
||||||
|
& "./dist/${name}_downgraded.exe" -v --update-to yt-dlp/yt-dlp@2023.03.04
|
||||||
|
$downgraded_version = & "./dist/${name}_downgraded.exe" --version
|
||||||
|
if ($version -eq $downgraded_version) {
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
- name: Upload artifacts
|
- name: Upload artifacts
|
||||||
uses: actions/upload-artifact@v3
|
uses: actions/upload-artifact@v3
|
||||||
with:
|
with:
|
||||||
|
@ -306,6 +365,19 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
python pyinst.py
|
python pyinst.py
|
||||||
|
|
||||||
|
- name: Verify --update-to
|
||||||
|
if: vars.UPDATE_TO_VERIFICATION
|
||||||
|
run: |
|
||||||
|
foreach ($name in @("yt-dlp_x86")) {
|
||||||
|
Copy-Item "./dist/${name}.exe" "./dist/${name}_downgraded.exe"
|
||||||
|
$version = & "./dist/${name}.exe" --version
|
||||||
|
& "./dist/${name}_downgraded.exe" -v --update-to yt-dlp/yt-dlp@2023.03.04
|
||||||
|
$downgraded_version = & "./dist/${name}_downgraded.exe" --version
|
||||||
|
if ($version -eq $downgraded_version) {
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
- name: Upload artifacts
|
- name: Upload artifacts
|
||||||
uses: actions/upload-artifact@v3
|
uses: actions/upload-artifact@v3
|
||||||
with:
|
with:
|
||||||
|
@ -313,7 +385,7 @@ jobs:
|
||||||
dist/yt-dlp_x86.exe
|
dist/yt-dlp_x86.exe
|
||||||
|
|
||||||
meta_files:
|
meta_files:
|
||||||
if: inputs.meta_files && always()
|
if: inputs.meta_files && always() && !cancelled()
|
||||||
needs:
|
needs:
|
||||||
- unix
|
- unix
|
||||||
- linux_arm
|
- linux_arm
|
||||||
|
|
20
.github/workflows/potential-duplicates.yml
vendored
Normal file
20
.github/workflows/potential-duplicates.yml
vendored
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
name: Potential Duplicates
|
||||||
|
on:
|
||||||
|
issues:
|
||||||
|
types: [opened, edited]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
run:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: wow-actions/potential-duplicates@v1
|
||||||
|
with:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
label: potential-duplicate
|
||||||
|
state: all
|
||||||
|
threshold: 0.7
|
||||||
|
comment: |
|
||||||
|
This issue is potentially a duplicate of one of the following issues:
|
||||||
|
{{#issues}}
|
||||||
|
- #{{ number }} ({{ accuracy }}%)
|
||||||
|
{{/issues}}
|
50
.github/workflows/publish.yml
vendored
50
.github/workflows/publish.yml
vendored
|
@ -2,16 +2,20 @@ name: Publish
|
||||||
on:
|
on:
|
||||||
workflow_call:
|
workflow_call:
|
||||||
inputs:
|
inputs:
|
||||||
nightly:
|
channel:
|
||||||
default: false
|
default: stable
|
||||||
required: false
|
required: true
|
||||||
type: boolean
|
type: string
|
||||||
version:
|
version:
|
||||||
required: true
|
required: true
|
||||||
type: string
|
type: string
|
||||||
target_commitish:
|
target_commitish:
|
||||||
required: true
|
required: true
|
||||||
type: string
|
type: string
|
||||||
|
prerelease:
|
||||||
|
default: false
|
||||||
|
required: true
|
||||||
|
type: boolean
|
||||||
secrets:
|
secrets:
|
||||||
ARCHIVE_REPO_TOKEN:
|
ARCHIVE_REPO_TOKEN:
|
||||||
required: false
|
required: false
|
||||||
|
@ -34,16 +38,27 @@ jobs:
|
||||||
|
|
||||||
- name: Generate release notes
|
- name: Generate release notes
|
||||||
run: |
|
run: |
|
||||||
|
printf '%s' \
|
||||||
|
'[]' \
|
||||||
|
'(https://github.com/yt-dlp/yt-dlp#installation "Installation instructions") ' \
|
||||||
|
'[]' \
|
||||||
|
'(https://github.com/yt-dlp/yt-dlp/tree/2023.03.04#readme "Documentation") ' \
|
||||||
|
'[]' \
|
||||||
|
'(https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators "Donate") ' \
|
||||||
|
'[]' \
|
||||||
|
'(https://discord.gg/H5MNcFW63r "Discord") ' \
|
||||||
|
${{ inputs.channel != 'nightly' && '"[]" \
|
||||||
|
"(https://github.com/yt-dlp/yt-dlp-nightly-builds/releases/latest \"Nightly builds\")"' || '' }} \
|
||||||
|
> ./RELEASE_NOTES
|
||||||
|
printf '\n\n' >> ./RELEASE_NOTES
|
||||||
cat >> ./RELEASE_NOTES << EOF
|
cat >> ./RELEASE_NOTES << EOF
|
||||||
#### A description of the various files are in the [README](https://github.com/yt-dlp/yt-dlp#release-files)
|
#### A description of the various files are in the [README](https://github.com/yt-dlp/yt-dlp#release-files)
|
||||||
---
|
---
|
||||||
<details><summary><h3>Changelog</h3></summary>
|
$(python ./devscripts/make_changelog.py -vv --collapsible)
|
||||||
$(python ./devscripts/make_changelog.py -vv)
|
|
||||||
</details>
|
|
||||||
EOF
|
EOF
|
||||||
echo "**This is an automated nightly pre-release build**" >> ./PRERELEASE_NOTES
|
printf '%s\n\n' '**This is an automated nightly pre-release build**' >> ./NIGHTLY_NOTES
|
||||||
cat ./RELEASE_NOTES >> ./PRERELEASE_NOTES
|
cat ./RELEASE_NOTES >> ./NIGHTLY_NOTES
|
||||||
echo "Generated from: https://github.com/${{ github.repository }}/commit/${{ inputs.target_commitish }}" >> ./ARCHIVE_NOTES
|
printf '%s\n\n' 'Generated from: https://github.com/${{ github.repository }}/commit/${{ inputs.target_commitish }}' >> ./ARCHIVE_NOTES
|
||||||
cat ./RELEASE_NOTES >> ./ARCHIVE_NOTES
|
cat ./RELEASE_NOTES >> ./ARCHIVE_NOTES
|
||||||
|
|
||||||
- name: Archive nightly release
|
- name: Archive nightly release
|
||||||
|
@ -51,7 +66,7 @@ jobs:
|
||||||
GH_TOKEN: ${{ secrets.ARCHIVE_REPO_TOKEN }}
|
GH_TOKEN: ${{ secrets.ARCHIVE_REPO_TOKEN }}
|
||||||
GH_REPO: ${{ vars.ARCHIVE_REPO }}
|
GH_REPO: ${{ vars.ARCHIVE_REPO }}
|
||||||
if: |
|
if: |
|
||||||
inputs.nightly && env.GH_TOKEN != '' && env.GH_REPO != ''
|
inputs.channel == 'nightly' && env.GH_TOKEN != '' && env.GH_REPO != ''
|
||||||
run: |
|
run: |
|
||||||
gh release create \
|
gh release create \
|
||||||
--notes-file ARCHIVE_NOTES \
|
--notes-file ARCHIVE_NOTES \
|
||||||
|
@ -60,7 +75,7 @@ jobs:
|
||||||
artifact/*
|
artifact/*
|
||||||
|
|
||||||
- name: Prune old nightly release
|
- name: Prune old nightly release
|
||||||
if: inputs.nightly && !vars.ARCHIVE_REPO
|
if: inputs.channel == 'nightly' && !vars.ARCHIVE_REPO
|
||||||
env:
|
env:
|
||||||
GH_TOKEN: ${{ github.token }}
|
GH_TOKEN: ${{ github.token }}
|
||||||
run: |
|
run: |
|
||||||
|
@ -68,14 +83,15 @@ jobs:
|
||||||
git tag --delete "nightly" || true
|
git tag --delete "nightly" || true
|
||||||
sleep 5 # Enough time to cover deletion race condition
|
sleep 5 # Enough time to cover deletion race condition
|
||||||
|
|
||||||
- name: Publish release${{ inputs.nightly && ' (nightly)' || '' }}
|
- name: Publish release${{ inputs.channel == 'nightly' && ' (nightly)' || '' }}
|
||||||
env:
|
env:
|
||||||
GH_TOKEN: ${{ github.token }}
|
GH_TOKEN: ${{ github.token }}
|
||||||
if: (inputs.nightly && !vars.ARCHIVE_REPO) || !inputs.nightly
|
if: (inputs.channel == 'nightly' && !vars.ARCHIVE_REPO) || inputs.channel != 'nightly'
|
||||||
run: |
|
run: |
|
||||||
gh release create \
|
gh release create \
|
||||||
--notes-file ${{ inputs.nightly && 'PRE' || '' }}RELEASE_NOTES \
|
--notes-file ${{ inputs.channel == 'nightly' && 'NIGHTLY_NOTES' || 'RELEASE_NOTES' }} \
|
||||||
--target ${{ inputs.target_commitish }} \
|
--target ${{ inputs.target_commitish }} \
|
||||||
--title "yt-dlp ${{ inputs.nightly && 'nightly ' || '' }}${{ inputs.version }}" \
|
--title "yt-dlp ${{ inputs.channel == 'nightly' && 'nightly ' || '' }}${{ inputs.version }}" \
|
||||||
${{ inputs.nightly && '--prerelease "nightly"' || inputs.version }} \
|
${{ inputs.prerelease && '--prerelease' || '' }} \
|
||||||
|
${{ inputs.channel == 'nightly' && '"nightly"' || inputs.version }} \
|
||||||
artifact/*
|
artifact/*
|
||||||
|
|
3
.github/workflows/release-nightly.yml
vendored
3
.github/workflows/release-nightly.yml
vendored
|
@ -46,6 +46,7 @@ jobs:
|
||||||
permissions:
|
permissions:
|
||||||
contents: write
|
contents: write
|
||||||
with:
|
with:
|
||||||
nightly: true
|
channel: nightly
|
||||||
|
prerelease: true
|
||||||
version: ${{ needs.prepare.outputs.version }}
|
version: ${{ needs.prepare.outputs.version }}
|
||||||
target_commitish: ${{ github.sha }}
|
target_commitish: ${{ github.sha }}
|
||||||
|
|
72
.github/workflows/release.yml
vendored
72
.github/workflows/release.yml
vendored
|
@ -1,5 +1,22 @@
|
||||||
name: Release
|
name: Release
|
||||||
on: workflow_dispatch
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
version:
|
||||||
|
description: Version tag (YYYY.MM.DD[.REV])
|
||||||
|
required: false
|
||||||
|
default: ''
|
||||||
|
type: string
|
||||||
|
channel:
|
||||||
|
description: Update channel (stable/nightly/...)
|
||||||
|
required: false
|
||||||
|
default: ''
|
||||||
|
type: string
|
||||||
|
prerelease:
|
||||||
|
description: Pre-release
|
||||||
|
default: false
|
||||||
|
type: boolean
|
||||||
|
|
||||||
permissions:
|
permissions:
|
||||||
contents: read
|
contents: read
|
||||||
|
|
||||||
|
@ -9,8 +26,9 @@ jobs:
|
||||||
contents: write
|
contents: write
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
outputs:
|
outputs:
|
||||||
|
channel: ${{ steps.set_channel.outputs.channel }}
|
||||||
version: ${{ steps.update_version.outputs.version }}
|
version: ${{ steps.update_version.outputs.version }}
|
||||||
head_sha: ${{ steps.push_release.outputs.head_sha }}
|
head_sha: ${{ steps.get_target.outputs.head_sha }}
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
|
@ -21,10 +39,18 @@ jobs:
|
||||||
with:
|
with:
|
||||||
python-version: "3.10"
|
python-version: "3.10"
|
||||||
|
|
||||||
|
- name: Set channel
|
||||||
|
id: set_channel
|
||||||
|
run: |
|
||||||
|
CHANNEL="${{ github.repository == 'yt-dlp/yt-dlp' && 'stable' || github.repository }}"
|
||||||
|
echo "channel=${{ inputs.channel || '$CHANNEL' }}" > "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
- name: Update version
|
- name: Update version
|
||||||
id: update_version
|
id: update_version
|
||||||
run: |
|
run: |
|
||||||
python devscripts/update-version.py ${{ vars.PUSH_VERSION_COMMIT == '' && '"$(date -u +"%H%M%S")"' || '' }} | \
|
REVISION="${{ vars.PUSH_VERSION_COMMIT == '' && '$(date -u +"%H%M%S")' || '' }}"
|
||||||
|
REVISION="${{ inputs.prerelease && '$(date -u +"%H%M%S")' || '$REVISION' }}"
|
||||||
|
python devscripts/update-version.py ${{ inputs.version || '$REVISION' }} | \
|
||||||
grep -Po "version=\d+\.\d+\.\d+(\.\d+)?" >> "$GITHUB_OUTPUT"
|
grep -Po "version=\d+\.\d+\.\d+(\.\d+)?" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
- name: Update documentation
|
- name: Update documentation
|
||||||
|
@ -39,6 +65,7 @@ jobs:
|
||||||
|
|
||||||
- name: Push to release
|
- name: Push to release
|
||||||
id: push_release
|
id: push_release
|
||||||
|
if: ${{ !inputs.prerelease }}
|
||||||
run: |
|
run: |
|
||||||
git config --global user.name github-actions
|
git config --global user.name github-actions
|
||||||
git config --global user.email github-actions@example.com
|
git config --global user.email github-actions@example.com
|
||||||
|
@ -46,14 +73,30 @@ jobs:
|
||||||
git commit -m "Release ${{ steps.update_version.outputs.version }}" \
|
git commit -m "Release ${{ steps.update_version.outputs.version }}" \
|
||||||
-m "Created by: ${{ github.event.sender.login }}" -m ":ci skip all :ci run dl"
|
-m "Created by: ${{ github.event.sender.login }}" -m ":ci skip all :ci run dl"
|
||||||
git push origin --force ${{ github.event.ref }}:release
|
git push origin --force ${{ github.event.ref }}:release
|
||||||
|
|
||||||
|
- name: Get target commitish
|
||||||
|
id: get_target
|
||||||
|
run: |
|
||||||
echo "head_sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
|
echo "head_sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
- name: Update master
|
- name: Update master
|
||||||
if: vars.PUSH_VERSION_COMMIT != ''
|
if: vars.PUSH_VERSION_COMMIT != '' && !inputs.prerelease
|
||||||
run: git push origin ${{ github.event.ref }}
|
run: git push origin ${{ github.event.ref }}
|
||||||
|
|
||||||
publish_pypi_homebrew:
|
build:
|
||||||
needs: prepare
|
needs: prepare
|
||||||
|
uses: ./.github/workflows/build.yml
|
||||||
|
with:
|
||||||
|
version: ${{ needs.prepare.outputs.version }}
|
||||||
|
channel: ${{ needs.prepare.outputs.channel }}
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
packages: write # For package cache
|
||||||
|
secrets:
|
||||||
|
GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }}
|
||||||
|
|
||||||
|
publish_pypi_homebrew:
|
||||||
|
needs: [prepare, build]
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
|
@ -77,7 +120,7 @@ jobs:
|
||||||
env:
|
env:
|
||||||
TWINE_USERNAME: __token__
|
TWINE_USERNAME: __token__
|
||||||
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
|
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
|
||||||
if: env.TWINE_PASSWORD != ''
|
if: env.TWINE_PASSWORD != '' && !inputs.prerelease
|
||||||
run: |
|
run: |
|
||||||
rm -rf dist/*
|
rm -rf dist/*
|
||||||
make pypi-files
|
make pypi-files
|
||||||
|
@ -89,7 +132,7 @@ jobs:
|
||||||
env:
|
env:
|
||||||
BREW_TOKEN: ${{ secrets.BREW_TOKEN }}
|
BREW_TOKEN: ${{ secrets.BREW_TOKEN }}
|
||||||
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
|
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
|
||||||
if: env.BREW_TOKEN != '' && env.PYPI_TOKEN != ''
|
if: env.BREW_TOKEN != '' && env.PYPI_TOKEN != '' && !inputs.prerelease
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
with:
|
with:
|
||||||
repository: yt-dlp/homebrew-taps
|
repository: yt-dlp/homebrew-taps
|
||||||
|
@ -100,7 +143,7 @@ jobs:
|
||||||
env:
|
env:
|
||||||
BREW_TOKEN: ${{ secrets.BREW_TOKEN }}
|
BREW_TOKEN: ${{ secrets.BREW_TOKEN }}
|
||||||
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
|
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
|
||||||
if: env.BREW_TOKEN != '' && env.PYPI_TOKEN != ''
|
if: env.BREW_TOKEN != '' && env.PYPI_TOKEN != '' && !inputs.prerelease
|
||||||
run: |
|
run: |
|
||||||
python devscripts/update-formulae.py taps/Formula/yt-dlp.rb "${{ needs.prepare.outputs.version }}"
|
python devscripts/update-formulae.py taps/Formula/yt-dlp.rb "${{ needs.prepare.outputs.version }}"
|
||||||
git -C taps/ config user.name github-actions
|
git -C taps/ config user.name github-actions
|
||||||
|
@ -108,22 +151,13 @@ jobs:
|
||||||
git -C taps/ commit -am 'yt-dlp: ${{ needs.prepare.outputs.version }}'
|
git -C taps/ commit -am 'yt-dlp: ${{ needs.prepare.outputs.version }}'
|
||||||
git -C taps/ push
|
git -C taps/ push
|
||||||
|
|
||||||
build:
|
|
||||||
needs: prepare
|
|
||||||
uses: ./.github/workflows/build.yml
|
|
||||||
with:
|
|
||||||
version: ${{ needs.prepare.outputs.version }}
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
packages: write # For package cache
|
|
||||||
secrets:
|
|
||||||
GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }}
|
|
||||||
|
|
||||||
publish:
|
publish:
|
||||||
needs: [prepare, build]
|
needs: [prepare, build]
|
||||||
uses: ./.github/workflows/publish.yml
|
uses: ./.github/workflows/publish.yml
|
||||||
permissions:
|
permissions:
|
||||||
contents: write
|
contents: write
|
||||||
with:
|
with:
|
||||||
|
channel: ${{ needs.prepare.outputs.channel }}
|
||||||
|
prerelease: ${{ inputs.prerelease }}
|
||||||
version: ${{ needs.prepare.outputs.version }}
|
version: ${{ needs.prepare.outputs.version }}
|
||||||
target_commitish: ${{ needs.prepare.outputs.head_sha }}
|
target_commitish: ${{ needs.prepare.outputs.head_sha }}
|
||||||
|
|
|
@ -79,7 +79,7 @@ ### Are you using the latest version?
|
||||||
|
|
||||||
### Is the issue already documented?
|
### Is the issue already documented?
|
||||||
|
|
||||||
Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/yt-dlp/yt-dlp/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2021.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
|
Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/yt-dlp/yt-dlp/search?type=Issues) of this repository. If there is an issue, subcribe to it to be notified when there is any progress. Unless you have something useful to add to the converation, please refrain from commenting.
|
||||||
|
|
||||||
Additionally, it is also helpful to see if the issue has already been documented in the [youtube-dl issue tracker](https://github.com/ytdl-org/youtube-dl/issues). If similar issues have already been reported in youtube-dl (but not in our issue tracker), links to them can be included in your issue report here.
|
Additionally, it is also helpful to see if the issue has already been documented in the [youtube-dl issue tracker](https://github.com/ytdl-org/youtube-dl/issues). If similar issues have already been reported in youtube-dl (but not in our issue tracker), links to them can be included in your issue report here.
|
||||||
|
|
||||||
|
@ -246,7 +246,7 @@ ## yt-dlp coding conventions
|
||||||
|
|
||||||
This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
|
This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
|
||||||
|
|
||||||
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old yt-dlp versions working. Even though this breakage issue may be easily fixed by a new version of yt-dlp, this could take some time, during which the the extractor will remain broken.
|
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old yt-dlp versions working. Even though this breakage issue may be easily fixed by a new version of yt-dlp, this could take some time, during which the extractor will remain broken.
|
||||||
|
|
||||||
|
|
||||||
### Mandatory and optional metafields
|
### Mandatory and optional metafields
|
||||||
|
|
|
@ -8,7 +8,7 @@ # Collaborators
|
||||||
## [pukkandan](https://github.com/pukkandan)
|
## [pukkandan](https://github.com/pukkandan)
|
||||||
|
|
||||||
[](https://ko-fi.com/pukkandan)
|
[](https://ko-fi.com/pukkandan)
|
||||||
[](https://github.com/sponsors/pukkandan)
|
[](https://github.com/sponsors/pukkandan)
|
||||||
|
|
||||||
* Owner of the fork
|
* Owner of the fork
|
||||||
|
|
||||||
|
@ -26,7 +26,7 @@ ## [shirt](https://github.com/shirt-dev)
|
||||||
|
|
||||||
## [coletdjnz](https://github.com/coletdjnz)
|
## [coletdjnz](https://github.com/coletdjnz)
|
||||||
|
|
||||||
[](https://github.com/sponsors/coletdjnz)
|
[](https://github.com/sponsors/coletdjnz)
|
||||||
|
|
||||||
* Improved plugin architecture
|
* Improved plugin architecture
|
||||||
* YouTube improvements including: age-gate bypass, private playlists, multiple-clients (to avoid throttling) and a lot of under-the-hood improvements
|
* YouTube improvements including: age-gate bypass, private playlists, multiple-clients (to avoid throttling) and a lot of under-the-hood improvements
|
||||||
|
@ -44,7 +44,7 @@ ## [Ashish0804](https://github.com/Ashish0804) <sub><sup>[Inactive]</sup></sub>
|
||||||
* Improved/fixed support for HiDive, HotStar, Hungama, LBRY, LinkedInLearning, Mxplayer, SonyLiv, TV2, Vimeo, VLive etc
|
* Improved/fixed support for HiDive, HotStar, Hungama, LBRY, LinkedInLearning, Mxplayer, SonyLiv, TV2, Vimeo, VLive etc
|
||||||
|
|
||||||
|
|
||||||
## [Lesmiscore](https://github.com/Lesmiscore) <sub><sup>(nao20010128nao)</sup></sub>
|
## [Lesmiscore](https://github.com/Lesmiscore)
|
||||||
|
|
||||||
**Bitcoin**: bc1qfd02r007cutfdjwjmyy9w23rjvtls6ncve7r3s
|
**Bitcoin**: bc1qfd02r007cutfdjwjmyy9w23rjvtls6ncve7r3s
|
||||||
**Monacoin**: mona1q3tf7dzvshrhfe3md379xtvt2n22duhglv5dskr
|
**Monacoin**: mona1q3tf7dzvshrhfe3md379xtvt2n22duhglv5dskr
|
||||||
|
@ -64,7 +64,7 @@ ## [bashonly](https://github.com/bashonly)
|
||||||
|
|
||||||
## [Grub4K](https://github.com/Grub4K)
|
## [Grub4K](https://github.com/Grub4K)
|
||||||
|
|
||||||
[](https://ko-fi.com/Grub4K) [](https://github.com/sponsors/Grub4K)
|
[](https://ko-fi.com/Grub4K) [](https://github.com/sponsors/Grub4K)
|
||||||
|
|
||||||
* `--update-to`, automated release, nightly builds
|
* `--update-to`, automated release, nightly builds
|
||||||
* Rework internals like `traverse_obj`, various core refactors and bugs fixes
|
* Rework internals like `traverse_obj`, various core refactors and bugs fixes
|
||||||
|
|
2
Makefile
2
Makefile
|
@ -74,7 +74,7 @@ offlinetest: codetest
|
||||||
$(PYTHON) -m pytest -k "not download"
|
$(PYTHON) -m pytest -k "not download"
|
||||||
|
|
||||||
# XXX: This is hard to maintain
|
# XXX: This is hard to maintain
|
||||||
CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat yt_dlp/dependencies
|
CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat yt_dlp/compat/urllib yt_dlp/utils yt_dlp/dependencies
|
||||||
yt-dlp: yt_dlp/*.py yt_dlp/*/*.py
|
yt-dlp: yt_dlp/*.py yt_dlp/*/*.py
|
||||||
mkdir -p zip
|
mkdir -p zip
|
||||||
for d in $(CODE_FOLDERS) ; do \
|
for d in $(CODE_FOLDERS) ; do \
|
||||||
|
|
51
README.md
51
README.md
|
@ -85,7 +85,7 @@ # NEW FEATURES
|
||||||
* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that NicoNico livestreams are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details.
|
* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that NicoNico livestreams are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details.
|
||||||
|
|
||||||
* **YouTube improvements**:
|
* **YouTube improvements**:
|
||||||
* Supports Clips, Stories (`ytstories:<channel UCID>`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefixes (`ytsearch:`, `ytsearchdate:`)**\***, Mixes, YouTube Music Albums/Channels ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723)), and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`)
|
* Supports Clips, Stories (`ytstories:<channel UCID>`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefixes (`ytsearch:`, `ytsearchdate:`)**\***, Mixes, and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`)
|
||||||
* Fix for [n-sig based throttling](https://github.com/ytdl-org/youtube-dl/issues/29326) **\***
|
* Fix for [n-sig based throttling](https://github.com/ytdl-org/youtube-dl/issues/29326) **\***
|
||||||
* Supports some (but not all) age-gated content without cookies
|
* Supports some (but not all) age-gated content without cookies
|
||||||
* Download livestreams from the start using `--live-from-start` (*experimental*)
|
* Download livestreams from the start using `--live-from-start` (*experimental*)
|
||||||
|
@ -179,13 +179,13 @@ # INSTALLATION
|
||||||
[](https://github.com/yt-dlp/yt-dlp/releases)
|
[](https://github.com/yt-dlp/yt-dlp/releases)
|
||||||
<!-- MANPAGE: END EXCLUDED SECTION -->
|
<!-- MANPAGE: END EXCLUDED SECTION -->
|
||||||
|
|
||||||
You can install yt-dlp using [the binaries](#release-files), [PIP](https://pypi.org/project/yt-dlp) or one using a third-party package manager. See [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/Installation) for detailed instructions
|
You can install yt-dlp using [the binaries](#release-files), [pip](https://pypi.org/project/yt-dlp) or one using a third-party package manager. See [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/Installation) for detailed instructions
|
||||||
|
|
||||||
|
|
||||||
## UPDATE
|
## UPDATE
|
||||||
You can use `yt-dlp -U` to update if you are using the [release binaries](#release-files)
|
You can use `yt-dlp -U` to update if you are using the [release binaries](#release-files)
|
||||||
|
|
||||||
If you [installed with PIP](https://github.com/yt-dlp/yt-dlp/wiki/Installation#with-pip), simply re-run the same command that was used to install the program
|
If you [installed with pip](https://github.com/yt-dlp/yt-dlp/wiki/Installation#with-pip), simply re-run the same command that was used to install the program
|
||||||
|
|
||||||
For other third-party package managers, see [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/Installation#third-party-package-managers) or refer their documentation
|
For other third-party package managers, see [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/Installation#third-party-package-managers) or refer their documentation
|
||||||
|
|
||||||
|
@ -196,12 +196,15 @@ ## UPDATE
|
||||||
The `nightly` channel has releases built after each push to the master branch, and will have the most recent fixes and additions, but also have more risk of regressions. They are available in [their own repo](https://github.com/yt-dlp/yt-dlp-nightly-builds/releases).
|
The `nightly` channel has releases built after each push to the master branch, and will have the most recent fixes and additions, but also have more risk of regressions. They are available in [their own repo](https://github.com/yt-dlp/yt-dlp-nightly-builds/releases).
|
||||||
|
|
||||||
When using `--update`/`-U`, a release binary will only update to its current channel.
|
When using `--update`/`-U`, a release binary will only update to its current channel.
|
||||||
This release channel can be changed by using the `--update-to` option. `--update-to` can also be used to upgrade or downgrade to specific tags from a channel.
|
`--update-to CHANNEL` can be used to switch to a different channel when a newer version is available. `--update-to [CHANNEL@]TAG` can also be used to upgrade or downgrade to specific tags from a channel.
|
||||||
|
|
||||||
|
You may also use `--update-to <repository>` (`<owner>/<repository>`) to update to a channel on a completely different repository. Be careful with what repository you are updating to though, there is no verification done for binaries from different repositories.
|
||||||
|
|
||||||
Example usage:
|
Example usage:
|
||||||
* `yt-dlp --update-to nightly` change to `nightly` channel and update to its latest release
|
* `yt-dlp --update-to nightly` change to `nightly` channel and update to its latest release
|
||||||
* `yt-dlp --update-to stable@2023.02.17` upgrade/downgrade to release to `stable` channel tag `2023.02.17`
|
* `yt-dlp --update-to stable@2023.02.17` upgrade/downgrade to release to `stable` channel tag `2023.02.17`
|
||||||
* `yt-dlp --update-to 2023.01.06` upgrade/downgrade to tag `2023.01.06` if it exists on the current channel
|
* `yt-dlp --update-to 2023.01.06` upgrade/downgrade to tag `2023.01.06` if it exists on the current channel
|
||||||
|
* `yt-dlp --update-to example/yt-dlp@2023.03.01` upgrade/downgrade to the release from the `example/yt-dlp` repository, tag `2023.03.01`
|
||||||
|
|
||||||
<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
|
<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
|
||||||
## RELEASE FILES
|
## RELEASE FILES
|
||||||
|
@ -360,10 +363,10 @@ ## General Options:
|
||||||
-U, --update Update this program to the latest version
|
-U, --update Update this program to the latest version
|
||||||
--no-update Do not check for updates (default)
|
--no-update Do not check for updates (default)
|
||||||
--update-to [CHANNEL]@[TAG] Upgrade/downgrade to a specific version.
|
--update-to [CHANNEL]@[TAG] Upgrade/downgrade to a specific version.
|
||||||
CHANNEL and TAG defaults to "stable" and
|
CHANNEL can be a repository as well. CHANNEL
|
||||||
"latest" respectively if omitted; See
|
and TAG default to "stable" and "latest"
|
||||||
"UPDATE" for details. Supported channels:
|
respectively if omitted; See "UPDATE" for
|
||||||
stable, nightly
|
details. Supported channels: stable, nightly
|
||||||
-i, --ignore-errors Ignore download and postprocessing errors.
|
-i, --ignore-errors Ignore download and postprocessing errors.
|
||||||
The download will be considered successful
|
The download will be considered successful
|
||||||
even if the postprocessing fails
|
even if the postprocessing fails
|
||||||
|
@ -409,7 +412,8 @@ ## General Options:
|
||||||
configuration files
|
configuration files
|
||||||
--flat-playlist Do not extract the videos of a playlist,
|
--flat-playlist Do not extract the videos of a playlist,
|
||||||
only list them
|
only list them
|
||||||
--no-flat-playlist Extract the videos of a playlist
|
--no-flat-playlist Fully extract the videos of a playlist
|
||||||
|
(default)
|
||||||
--live-from-start Download livestreams from the start.
|
--live-from-start Download livestreams from the start.
|
||||||
Currently only supported for YouTube
|
Currently only supported for YouTube
|
||||||
(Experimental)
|
(Experimental)
|
||||||
|
@ -421,8 +425,12 @@ ## General Options:
|
||||||
--no-wait-for-video Do not wait for scheduled streams (default)
|
--no-wait-for-video Do not wait for scheduled streams (default)
|
||||||
--mark-watched Mark videos watched (even with --simulate)
|
--mark-watched Mark videos watched (even with --simulate)
|
||||||
--no-mark-watched Do not mark videos watched (default)
|
--no-mark-watched Do not mark videos watched (default)
|
||||||
--no-colors Do not emit color codes in output (Alias:
|
--color [STREAM:]POLICY Whether to emit color codes in output,
|
||||||
--no-colours)
|
optionally prefixed by the STREAM (stdout or
|
||||||
|
stderr) to apply the setting to. Can be one
|
||||||
|
of "always", "auto" (default), "never", or
|
||||||
|
"no_color" (use non color terminal
|
||||||
|
sequences). Can be used multiple times
|
||||||
--compat-options OPTS Options that can help keep compatibility
|
--compat-options OPTS Options that can help keep compatibility
|
||||||
with youtube-dl or youtube-dlc
|
with youtube-dl or youtube-dlc
|
||||||
configurations by reverting some of the
|
configurations by reverting some of the
|
||||||
|
@ -465,9 +473,9 @@ ## Geo-restriction:
|
||||||
downloading
|
downloading
|
||||||
--xff VALUE How to fake X-Forwarded-For HTTP header to
|
--xff VALUE How to fake X-Forwarded-For HTTP header to
|
||||||
try bypassing geographic restriction. One of
|
try bypassing geographic restriction. One of
|
||||||
"default" (Only when known to be useful),
|
"default" (only when known to be useful),
|
||||||
"never", a two-letter ISO 3166-2 country
|
"never", an IP block in CIDR notation, or a
|
||||||
code, or an IP block in CIDR notation
|
two-letter ISO 3166-2 country code
|
||||||
|
|
||||||
## Video Selection:
|
## Video Selection:
|
||||||
-I, --playlist-items ITEM_SPEC Comma separated playlist_index of the items
|
-I, --playlist-items ITEM_SPEC Comma separated playlist_index of the items
|
||||||
|
@ -514,7 +522,7 @@ ## Video Selection:
|
||||||
dogs" (caseless). Use "--match-filter -" to
|
dogs" (caseless). Use "--match-filter -" to
|
||||||
interactively ask whether to download each
|
interactively ask whether to download each
|
||||||
video
|
video
|
||||||
--no-match-filter Do not use any --match-filter (default)
|
--no-match-filters Do not use any --match-filter (default)
|
||||||
--break-match-filters FILTER Same as "--match-filters" but stops the
|
--break-match-filters FILTER Same as "--match-filters" but stops the
|
||||||
download process when a video is rejected
|
download process when a video is rejected
|
||||||
--no-break-match-filters Do not use any --break-match-filters (default)
|
--no-break-match-filters Do not use any --break-match-filters (default)
|
||||||
|
@ -1709,7 +1717,7 @@ # MODIFYING METADATA
|
||||||
|
|
||||||
This option also has a few special uses:
|
This option also has a few special uses:
|
||||||
|
|
||||||
* You can download an additional URL based on the metadata of the currently downloaded video. To do this, set the field `additional_urls` to the URL that you want to download. E.g. `--parse-metadata "description:(?P<additional_urls>https?://www\.vimeo\.com/\d+)` will download the first vimeo video found in the description
|
* You can download an additional URL based on the metadata of the currently downloaded video. To do this, set the field `additional_urls` to the URL that you want to download. E.g. `--parse-metadata "description:(?P<additional_urls>https?://www\.vimeo\.com/\d+)"` will download the first vimeo video found in the description
|
||||||
|
|
||||||
* You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file - you can use this to set a different "description" and "synopsis". To modify the metadata of individual streams, use the `meta<n>_` prefix (e.g. `meta1_language`). Any value set to the `meta_` field will overwrite all default values.
|
* You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file - you can use this to set a different "description" and "synopsis". To modify the metadata of individual streams, use the `meta<n>_` prefix (e.g. `meta1_language`). Any value set to the `meta_` field will overwrite all default values.
|
||||||
|
|
||||||
|
@ -1835,6 +1843,12 @@ #### rokfinchannel
|
||||||
#### twitter
|
#### twitter
|
||||||
* `legacy_api`: Force usage of the legacy Twitter API instead of the GraphQL API for tweet extraction. Has no effect if login cookies are passed
|
* `legacy_api`: Force usage of the legacy Twitter API instead of the GraphQL API for tweet extraction. Has no effect if login cookies are passed
|
||||||
|
|
||||||
|
### wrestleuniverse
|
||||||
|
* `device_id`: UUID value assigned by the website and used to enforce device limits for paid livestream content. Can be found in browser local storage
|
||||||
|
|
||||||
|
#### twitchstream (Twitch)
|
||||||
|
* `client_id`: Client ID value to be sent with GraphQL requests, e.g. `twitchstream:client_id=kimne78kx3ncx6brgo4mv6wki5h1ko`
|
||||||
|
|
||||||
**Note**: These options may be changed/removed in the future without concern for backward compatibility
|
**Note**: These options may be changed/removed in the future without concern for backward compatibility
|
||||||
|
|
||||||
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
|
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
|
||||||
|
@ -1880,7 +1894,7 @@ ## Installing Plugins
|
||||||
* **System Plugins**
|
* **System Plugins**
|
||||||
* `/etc/yt-dlp/plugins/<package name>/yt_dlp_plugins/`
|
* `/etc/yt-dlp/plugins/<package name>/yt_dlp_plugins/`
|
||||||
* `/etc/yt-dlp-plugins/<package name>/yt_dlp_plugins/`
|
* `/etc/yt-dlp-plugins/<package name>/yt_dlp_plugins/`
|
||||||
2. **Executable location**: Plugin packages can similarly be installed in a `yt-dlp-plugins` directory under the executable location:
|
2. **Executable location**: Plugin packages can similarly be installed in a `yt-dlp-plugins` directory under the executable location (recommended for portable installations):
|
||||||
* Binary: where `<root-dir>/yt-dlp.exe`, `<root-dir>/yt-dlp-plugins/<package name>/yt_dlp_plugins/`
|
* Binary: where `<root-dir>/yt-dlp.exe`, `<root-dir>/yt-dlp-plugins/<package name>/yt_dlp_plugins/`
|
||||||
* Source: where `<root-dir>/yt_dlp/__main__.py`, `<root-dir>/yt-dlp-plugins/<package name>/yt_dlp_plugins/`
|
* Source: where `<root-dir>/yt_dlp/__main__.py`, `<root-dir>/yt-dlp-plugins/<package name>/yt_dlp_plugins/`
|
||||||
|
|
||||||
|
@ -2068,7 +2082,7 @@ #### Use a custom format selector
|
||||||
```python
|
```python
|
||||||
import yt_dlp
|
import yt_dlp
|
||||||
|
|
||||||
URL = ['https://www.youtube.com/watch?v=BaW_jenozKc']
|
URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc']
|
||||||
|
|
||||||
def format_selector(ctx):
|
def format_selector(ctx):
|
||||||
""" Select the best video and the best audio that won't result in an mkv.
|
""" Select the best video and the best audio that won't result in an mkv.
|
||||||
|
@ -2141,6 +2155,7 @@ #### Redundant options
|
||||||
--playlist-end NUMBER -I :NUMBER
|
--playlist-end NUMBER -I :NUMBER
|
||||||
--playlist-reverse -I ::-1
|
--playlist-reverse -I ::-1
|
||||||
--no-playlist-reverse Default
|
--no-playlist-reverse Default
|
||||||
|
--no-colors --color no_color
|
||||||
|
|
||||||
|
|
||||||
#### Not recommended
|
#### Not recommended
|
||||||
|
|
48
devscripts/cli_to_api.py
Normal file
48
devscripts/cli_to_api.py
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
import yt_dlp
|
||||||
|
import yt_dlp.options
|
||||||
|
|
||||||
|
create_parser = yt_dlp.options.create_parser
|
||||||
|
|
||||||
|
|
||||||
|
def parse_patched_options(opts):
|
||||||
|
patched_parser = create_parser()
|
||||||
|
patched_parser.defaults.update({
|
||||||
|
'ignoreerrors': False,
|
||||||
|
'retries': 0,
|
||||||
|
'fragment_retries': 0,
|
||||||
|
'extract_flat': False,
|
||||||
|
'concat_playlist': 'never',
|
||||||
|
})
|
||||||
|
yt_dlp.options.__dict__['create_parser'] = lambda: patched_parser
|
||||||
|
try:
|
||||||
|
return yt_dlp.parse_options(opts)
|
||||||
|
finally:
|
||||||
|
yt_dlp.options.__dict__['create_parser'] = create_parser
|
||||||
|
|
||||||
|
|
||||||
|
default_opts = parse_patched_options([]).ydl_opts
|
||||||
|
|
||||||
|
|
||||||
|
def cli_to_api(opts, cli_defaults=False):
|
||||||
|
opts = (yt_dlp.parse_options if cli_defaults else parse_patched_options)(opts).ydl_opts
|
||||||
|
|
||||||
|
diff = {k: v for k, v in opts.items() if default_opts[k] != v}
|
||||||
|
if 'postprocessors' in diff:
|
||||||
|
diff['postprocessors'] = [pp for pp in diff['postprocessors']
|
||||||
|
if pp not in default_opts['postprocessors']]
|
||||||
|
return diff
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
from pprint import pprint
|
||||||
|
|
||||||
|
print('\nThe arguments passed translate to:\n')
|
||||||
|
pprint(cli_to_api(sys.argv[1:]))
|
||||||
|
print('\nCombining these with the CLI defaults gives:\n')
|
||||||
|
pprint(cli_to_api(sys.argv[1:], True))
|
|
@ -26,7 +26,6 @@
|
||||||
|
|
||||||
|
|
||||||
class CommitGroup(enum.Enum):
|
class CommitGroup(enum.Enum):
|
||||||
UPSTREAM = None
|
|
||||||
PRIORITY = 'Important'
|
PRIORITY = 'Important'
|
||||||
CORE = 'Core'
|
CORE = 'Core'
|
||||||
EXTRACTOR = 'Extractor'
|
EXTRACTOR = 'Extractor'
|
||||||
|
@ -34,6 +33,11 @@ class CommitGroup(enum.Enum):
|
||||||
POSTPROCESSOR = 'Postprocessor'
|
POSTPROCESSOR = 'Postprocessor'
|
||||||
MISC = 'Misc.'
|
MISC = 'Misc.'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@property
|
||||||
|
def ignorable_prefixes(cls):
|
||||||
|
return ('core', 'downloader', 'extractor', 'misc', 'postprocessor', 'upstream')
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@lru_cache
|
@lru_cache
|
||||||
def commit_lookup(cls):
|
def commit_lookup(cls):
|
||||||
|
@ -41,7 +45,6 @@ def commit_lookup(cls):
|
||||||
name: group
|
name: group
|
||||||
for group, names in {
|
for group, names in {
|
||||||
cls.PRIORITY: {''},
|
cls.PRIORITY: {''},
|
||||||
cls.UPSTREAM: {'upstream'},
|
|
||||||
cls.CORE: {
|
cls.CORE: {
|
||||||
'aes',
|
'aes',
|
||||||
'cache',
|
'cache',
|
||||||
|
@ -54,6 +57,7 @@ def commit_lookup(cls):
|
||||||
'outtmpl',
|
'outtmpl',
|
||||||
'plugins',
|
'plugins',
|
||||||
'update',
|
'update',
|
||||||
|
'upstream',
|
||||||
'utils',
|
'utils',
|
||||||
},
|
},
|
||||||
cls.MISC: {
|
cls.MISC: {
|
||||||
|
@ -111,22 +115,36 @@ def key(self):
|
||||||
return ((self.details or '').lower(), self.sub_details, self.message)
|
return ((self.details or '').lower(), self.sub_details, self.message)
|
||||||
|
|
||||||
|
|
||||||
|
def unique(items):
|
||||||
|
return sorted({item.strip().lower(): item for item in items if item}.values())
|
||||||
|
|
||||||
|
|
||||||
class Changelog:
|
class Changelog:
|
||||||
MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE)
|
MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE)
|
||||||
|
ALWAYS_SHOWN = (CommitGroup.PRIORITY,)
|
||||||
|
|
||||||
def __init__(self, groups, repo):
|
def __init__(self, groups, repo, collapsible=False):
|
||||||
self._groups = groups
|
self._groups = groups
|
||||||
self._repo = repo
|
self._repo = repo
|
||||||
|
self._collapsible = collapsible
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return '\n'.join(self._format_groups(self._groups)).replace('\t', ' ')
|
return '\n'.join(self._format_groups(self._groups)).replace('\t', ' ')
|
||||||
|
|
||||||
def _format_groups(self, groups):
|
def _format_groups(self, groups):
|
||||||
|
first = True
|
||||||
for item in CommitGroup:
|
for item in CommitGroup:
|
||||||
|
if self._collapsible and item not in self.ALWAYS_SHOWN and first:
|
||||||
|
first = False
|
||||||
|
yield '\n<details><summary><h3>Changelog</h3></summary>\n'
|
||||||
|
|
||||||
group = groups[item]
|
group = groups[item]
|
||||||
if group:
|
if group:
|
||||||
yield self.format_module(item.value, group)
|
yield self.format_module(item.value, group)
|
||||||
|
|
||||||
|
if self._collapsible:
|
||||||
|
yield '\n</details>'
|
||||||
|
|
||||||
def format_module(self, name, group):
|
def format_module(self, name, group):
|
||||||
result = f'\n#### {name} changes\n' if name else '\n'
|
result = f'\n#### {name} changes\n' if name else '\n'
|
||||||
return result + '\n'.join(self._format_group(group))
|
return result + '\n'.join(self._format_group(group))
|
||||||
|
@ -137,62 +155,52 @@ def _format_group(self, group):
|
||||||
for _, items in detail_groups:
|
for _, items in detail_groups:
|
||||||
items = list(items)
|
items = list(items)
|
||||||
details = items[0].details
|
details = items[0].details
|
||||||
if not details:
|
|
||||||
indent = ''
|
|
||||||
else:
|
|
||||||
yield f'- {details}'
|
|
||||||
indent = '\t'
|
|
||||||
|
|
||||||
if details == 'cleanup':
|
if details == 'cleanup':
|
||||||
items, cleanup_misc_items = self._filter_cleanup_misc_items(items)
|
items = self._prepare_cleanup_misc_items(items)
|
||||||
|
|
||||||
|
prefix = '-'
|
||||||
|
if details:
|
||||||
|
if len(items) == 1:
|
||||||
|
prefix = f'- **{details}**:'
|
||||||
|
else:
|
||||||
|
yield f'- **{details}**'
|
||||||
|
prefix = '\t-'
|
||||||
|
|
||||||
sub_detail_groups = itertools.groupby(items, lambda item: tuple(map(str.lower, item.sub_details)))
|
sub_detail_groups = itertools.groupby(items, lambda item: tuple(map(str.lower, item.sub_details)))
|
||||||
for sub_details, entries in sub_detail_groups:
|
for sub_details, entries in sub_detail_groups:
|
||||||
if not sub_details:
|
if not sub_details:
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
yield f'{indent}- {self.format_single_change(entry)}'
|
yield f'{prefix} {self.format_single_change(entry)}'
|
||||||
continue
|
continue
|
||||||
|
|
||||||
entries = list(entries)
|
entries = list(entries)
|
||||||
prefix = f'{indent}- {", ".join(entries[0].sub_details)}'
|
sub_prefix = f'{prefix} {", ".join(entries[0].sub_details)}'
|
||||||
if len(entries) == 1:
|
if len(entries) == 1:
|
||||||
yield f'{prefix}: {self.format_single_change(entries[0])}'
|
yield f'{sub_prefix}: {self.format_single_change(entries[0])}'
|
||||||
continue
|
continue
|
||||||
|
|
||||||
yield prefix
|
yield sub_prefix
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
yield f'{indent}\t- {self.format_single_change(entry)}'
|
yield f'\t{prefix} {self.format_single_change(entry)}'
|
||||||
|
|
||||||
if details == 'cleanup' and cleanup_misc_items:
|
def _prepare_cleanup_misc_items(self, items):
|
||||||
yield from self._format_cleanup_misc_sub_group(cleanup_misc_items)
|
|
||||||
|
|
||||||
def _filter_cleanup_misc_items(self, items):
|
|
||||||
cleanup_misc_items = defaultdict(list)
|
cleanup_misc_items = defaultdict(list)
|
||||||
non_misc_items = []
|
sorted_items = []
|
||||||
for item in items:
|
for item in items:
|
||||||
if self.MISC_RE.search(item.message):
|
if self.MISC_RE.search(item.message):
|
||||||
cleanup_misc_items[tuple(item.commit.authors)].append(item)
|
cleanup_misc_items[tuple(item.commit.authors)].append(item)
|
||||||
else:
|
else:
|
||||||
non_misc_items.append(item)
|
sorted_items.append(item)
|
||||||
|
|
||||||
return non_misc_items, cleanup_misc_items
|
for commit_infos in cleanup_misc_items.values():
|
||||||
|
sorted_items.append(CommitInfo(
|
||||||
|
'cleanup', ('Miscellaneous',), ', '.join(
|
||||||
|
self._format_message_link(None, info.commit.hash)
|
||||||
|
for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')),
|
||||||
|
[], Commit(None, '', commit_infos[0].commit.authors), []))
|
||||||
|
|
||||||
def _format_cleanup_misc_sub_group(self, group):
|
return sorted_items
|
||||||
prefix = '\t- Miscellaneous'
|
|
||||||
if len(group) == 1:
|
|
||||||
yield f'{prefix}: {next(self._format_cleanup_misc_items(group))}'
|
|
||||||
return
|
|
||||||
|
|
||||||
yield prefix
|
|
||||||
for message in self._format_cleanup_misc_items(group):
|
|
||||||
yield f'\t\t- {message}'
|
|
||||||
|
|
||||||
def _format_cleanup_misc_items(self, group):
|
|
||||||
for authors, infos in group.items():
|
|
||||||
message = ', '.join(
|
|
||||||
self._format_message_link(None, info.commit.hash)
|
|
||||||
for info in sorted(infos, key=lambda item: item.commit.hash or ''))
|
|
||||||
yield f'{message} by {self._format_authors(authors)}'
|
|
||||||
|
|
||||||
def format_single_change(self, info):
|
def format_single_change(self, info):
|
||||||
message = self._format_message_link(info.message, info.commit.hash)
|
message = self._format_message_link(info.message, info.commit.hash)
|
||||||
|
@ -236,12 +244,8 @@ class CommitRange:
|
||||||
|
|
||||||
AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE)
|
AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE)
|
||||||
MESSAGE_RE = re.compile(r'''
|
MESSAGE_RE = re.compile(r'''
|
||||||
(?:\[
|
(?:\[(?P<prefix>[^\]]+)\]\ )?
|
||||||
(?P<prefix>[^\]\/:,]+)
|
(?:(?P<sub_details>`?[^:`]+`?): )?
|
||||||
(?:/(?P<details>[^\]:,]+))?
|
|
||||||
(?:[:,](?P<sub_details>[^\]]+))?
|
|
||||||
\]\ )?
|
|
||||||
(?:(?P<sub_details_alt>`?[^:`]+`?): )?
|
|
||||||
(?P<message>.+?)
|
(?P<message>.+?)
|
||||||
(?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))?
|
(?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))?
|
||||||
''', re.VERBOSE | re.DOTALL)
|
''', re.VERBOSE | re.DOTALL)
|
||||||
|
@ -340,60 +344,76 @@ def apply_overrides(self, overrides):
|
||||||
self._commits = {key: value for key, value in reversed(self._commits.items())}
|
self._commits = {key: value for key, value in reversed(self._commits.items())}
|
||||||
|
|
||||||
def groups(self):
|
def groups(self):
|
||||||
groups = defaultdict(list)
|
group_dict = defaultdict(list)
|
||||||
for commit in self:
|
for commit in self:
|
||||||
upstream_re = self.UPSTREAM_MERGE_RE.match(commit.short)
|
upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short)
|
||||||
if upstream_re:
|
if upstream_re:
|
||||||
commit.short = f'[upstream] Merge up to youtube-dl {upstream_re.group(1)}'
|
commit.short = f'[upstream] Merged with youtube-dl {upstream_re.group(1)}'
|
||||||
|
|
||||||
match = self.MESSAGE_RE.fullmatch(commit.short)
|
match = self.MESSAGE_RE.fullmatch(commit.short)
|
||||||
if not match:
|
if not match:
|
||||||
logger.error(f'Error parsing short commit message: {commit.short!r}')
|
logger.error(f'Error parsing short commit message: {commit.short!r}')
|
||||||
continue
|
continue
|
||||||
|
|
||||||
prefix, details, sub_details, sub_details_alt, message, issues = match.groups()
|
prefix, sub_details_alt, message, issues = match.groups()
|
||||||
group = None
|
|
||||||
if prefix:
|
|
||||||
if prefix == 'priority':
|
|
||||||
prefix, _, details = (details or '').partition('/')
|
|
||||||
logger.debug(f'Priority: {message!r}')
|
|
||||||
group = CommitGroup.PRIORITY
|
|
||||||
|
|
||||||
if not details and prefix:
|
|
||||||
if prefix not in ('core', 'downloader', 'extractor', 'misc', 'postprocessor', 'upstream'):
|
|
||||||
logger.debug(f'Replaced details with {prefix!r}')
|
|
||||||
details = prefix or None
|
|
||||||
|
|
||||||
if details == 'common':
|
|
||||||
details = None
|
|
||||||
|
|
||||||
if details:
|
|
||||||
details = details.strip()
|
|
||||||
|
|
||||||
else:
|
|
||||||
group = CommitGroup.CORE
|
|
||||||
|
|
||||||
sub_details = f'{sub_details or ""},{sub_details_alt or ""}'.replace(':', ',')
|
|
||||||
sub_details = tuple(filter(None, map(str.strip, sub_details.split(','))))
|
|
||||||
|
|
||||||
issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else []
|
issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else []
|
||||||
|
|
||||||
|
if prefix:
|
||||||
|
groups, details, sub_details = zip(*map(self.details_from_prefix, prefix.split(',')))
|
||||||
|
group = next(iter(filter(None, groups)), None)
|
||||||
|
details = ', '.join(unique(details))
|
||||||
|
sub_details = list(itertools.chain.from_iterable(sub_details))
|
||||||
|
else:
|
||||||
|
group = CommitGroup.CORE
|
||||||
|
details = None
|
||||||
|
sub_details = []
|
||||||
|
|
||||||
|
if sub_details_alt:
|
||||||
|
sub_details.append(sub_details_alt)
|
||||||
|
sub_details = tuple(unique(sub_details))
|
||||||
|
|
||||||
if not group:
|
if not group:
|
||||||
group = CommitGroup.get(prefix.lower())
|
if self.EXTRACTOR_INDICATOR_RE.search(commit.short):
|
||||||
if not group:
|
group = CommitGroup.EXTRACTOR
|
||||||
if self.EXTRACTOR_INDICATOR_RE.search(commit.short):
|
else:
|
||||||
group = CommitGroup.EXTRACTOR
|
group = CommitGroup.POSTPROCESSOR
|
||||||
else:
|
logger.warning(f'Failed to map {commit.short!r}, selected {group.name.lower()}')
|
||||||
group = CommitGroup.POSTPROCESSOR
|
|
||||||
logger.warning(f'Failed to map {commit.short!r}, selected {group.name}')
|
|
||||||
|
|
||||||
commit_info = CommitInfo(
|
commit_info = CommitInfo(
|
||||||
details, sub_details, message.strip(),
|
details, sub_details, message.strip(),
|
||||||
issues, commit, self._fixes[commit.hash])
|
issues, commit, self._fixes[commit.hash])
|
||||||
logger.debug(f'Resolved {commit.short!r} to {commit_info!r}')
|
|
||||||
groups[group].append(commit_info)
|
|
||||||
|
|
||||||
return groups
|
logger.debug(f'Resolved {commit.short!r} to {commit_info!r}')
|
||||||
|
group_dict[group].append(commit_info)
|
||||||
|
|
||||||
|
return group_dict
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def details_from_prefix(prefix):
|
||||||
|
if not prefix:
|
||||||
|
return CommitGroup.CORE, None, ()
|
||||||
|
|
||||||
|
prefix, _, details = prefix.partition('/')
|
||||||
|
prefix = prefix.strip().lower()
|
||||||
|
details = details.strip()
|
||||||
|
|
||||||
|
group = CommitGroup.get(prefix)
|
||||||
|
if group is CommitGroup.PRIORITY:
|
||||||
|
prefix, _, details = details.partition('/')
|
||||||
|
|
||||||
|
if not details and prefix and prefix not in CommitGroup.ignorable_prefixes:
|
||||||
|
logger.debug(f'Replaced details with {prefix!r}')
|
||||||
|
details = prefix or None
|
||||||
|
|
||||||
|
if details == 'common':
|
||||||
|
details = None
|
||||||
|
|
||||||
|
if details:
|
||||||
|
details, *sub_details = details.split(':')
|
||||||
|
else:
|
||||||
|
sub_details = []
|
||||||
|
|
||||||
|
return group, details, sub_details
|
||||||
|
|
||||||
|
|
||||||
def get_new_contributors(contributors_path, commits):
|
def get_new_contributors(contributors_path, commits):
|
||||||
|
@ -444,6 +464,9 @@ def get_new_contributors(contributors_path, commits):
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--repo', default='yt-dlp/yt-dlp',
|
'--repo', default='yt-dlp/yt-dlp',
|
||||||
help='the github repository to use for the operations (default: %(default)s)')
|
help='the github repository to use for the operations (default: %(default)s)')
|
||||||
|
parser.add_argument(
|
||||||
|
'--collapsible', action='store_true',
|
||||||
|
help='make changelog collapsible (default: %(default)s)')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
|
@ -467,4 +490,4 @@ def get_new_contributors(contributors_path, commits):
|
||||||
write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a')
|
write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a')
|
||||||
logger.info(f'New contributors: {", ".join(new_contributors)}')
|
logger.info(f'New contributors: {", ".join(new_contributors)}')
|
||||||
|
|
||||||
print(Changelog(commits.groups(), args.repo))
|
print(Changelog(commits.groups(), args.repo, args.collapsible))
|
||||||
|
|
|
@ -51,7 +51,7 @@ def get_git_head():
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
parser = argparse.ArgumentParser(description='Update the version.py file')
|
parser = argparse.ArgumentParser(description='Update the version.py file')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-c', '--channel', choices=['stable', 'nightly'], default='stable',
|
'-c', '--channel', default='stable',
|
||||||
help='Select update channel (default: %(default)s)')
|
help='Select update channel (default: %(default)s)')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-o', '--output', default='yt_dlp/version.py',
|
'-o', '--output', default='yt_dlp/version.py',
|
||||||
|
|
|
@ -8,6 +8,7 @@ ignore = E402,E501,E731,E741,W503
|
||||||
max_line_length = 120
|
max_line_length = 120
|
||||||
per_file_ignores =
|
per_file_ignores =
|
||||||
devscripts/lazy_load_template.py: F401
|
devscripts/lazy_load_template.py: F401
|
||||||
|
yt_dlp/utils/__init__.py: F401, F403
|
||||||
|
|
||||||
|
|
||||||
[autoflake]
|
[autoflake]
|
||||||
|
|
|
@ -194,8 +194,8 @@ def sanitize_got_info_dict(got_dict):
|
||||||
'formats', 'thumbnails', 'subtitles', 'automatic_captions', 'comments', 'entries',
|
'formats', 'thumbnails', 'subtitles', 'automatic_captions', 'comments', 'entries',
|
||||||
|
|
||||||
# Auto-generated
|
# Auto-generated
|
||||||
'autonumber', 'playlist', 'format_index', 'video_ext', 'audio_ext', 'duration_string', 'epoch',
|
'autonumber', 'playlist', 'format_index', 'video_ext', 'audio_ext', 'duration_string', 'epoch', 'n_entries',
|
||||||
'fulltitle', 'extractor', 'extractor_key', 'filepath', 'infojson_filename', 'original_url', 'n_entries',
|
'fulltitle', 'extractor', 'extractor_key', 'filename', 'filepath', 'infojson_filename', 'original_url',
|
||||||
|
|
||||||
# Only live_status needs to be checked
|
# Only live_status needs to be checked
|
||||||
'is_live', 'was_live',
|
'is_live', 'was_live',
|
||||||
|
|
|
@ -10,7 +10,6 @@
|
||||||
|
|
||||||
import copy
|
import copy
|
||||||
import json
|
import json
|
||||||
import urllib.error
|
|
||||||
|
|
||||||
from test.helper import FakeYDL, assertRegexpMatches
|
from test.helper import FakeYDL, assertRegexpMatches
|
||||||
from yt_dlp import YoutubeDL
|
from yt_dlp import YoutubeDL
|
||||||
|
@ -757,7 +756,7 @@ def expect_same_infodict(out):
|
||||||
test('%(id)r %(height)r', "'1234' 1080")
|
test('%(id)r %(height)r', "'1234' 1080")
|
||||||
test('%(ext)s-%(ext|def)d', 'mp4-def')
|
test('%(ext)s-%(ext|def)d', 'mp4-def')
|
||||||
test('%(width|0)04d', '0000')
|
test('%(width|0)04d', '0000')
|
||||||
test('a%(width|)d', 'a', outtmpl_na_placeholder='none')
|
test('a%(width|b)d', 'ab', outtmpl_na_placeholder='none')
|
||||||
|
|
||||||
FORMATS = self.outtmpl_info['formats']
|
FORMATS = self.outtmpl_info['formats']
|
||||||
sanitize = lambda x: x.replace(':', ':').replace('"', """).replace('\n', ' ')
|
sanitize = lambda x: x.replace(':', ':').replace('"', """).replace('\n', ' ')
|
||||||
|
@ -871,12 +870,12 @@ def test_postprocessors(self):
|
||||||
|
|
||||||
class SimplePP(PostProcessor):
|
class SimplePP(PostProcessor):
|
||||||
def run(self, info):
|
def run(self, info):
|
||||||
with open(audiofile, 'wt') as f:
|
with open(audiofile, 'w') as f:
|
||||||
f.write('EXAMPLE')
|
f.write('EXAMPLE')
|
||||||
return [info['filepath']], info
|
return [info['filepath']], info
|
||||||
|
|
||||||
def run_pp(params, PP):
|
def run_pp(params, PP):
|
||||||
with open(filename, 'wt') as f:
|
with open(filename, 'w') as f:
|
||||||
f.write('EXAMPLE')
|
f.write('EXAMPLE')
|
||||||
ydl = YoutubeDL(params)
|
ydl = YoutubeDL(params)
|
||||||
ydl.add_post_processor(PP())
|
ydl.add_post_processor(PP())
|
||||||
|
@ -895,7 +894,7 @@ def run_pp(params, PP):
|
||||||
|
|
||||||
class ModifierPP(PostProcessor):
|
class ModifierPP(PostProcessor):
|
||||||
def run(self, info):
|
def run(self, info):
|
||||||
with open(info['filepath'], 'wt') as f:
|
with open(info['filepath'], 'w') as f:
|
||||||
f.write('MODIFIED')
|
f.write('MODIFIED')
|
||||||
return [], info
|
return [], info
|
||||||
|
|
||||||
|
@ -1097,11 +1096,6 @@ def test_selection(params, expected_ids, evaluate_all=False):
|
||||||
test_selection({'playlist_items': '-15::2'}, INDICES[1::2], True)
|
test_selection({'playlist_items': '-15::2'}, INDICES[1::2], True)
|
||||||
test_selection({'playlist_items': '-15::15'}, [], True)
|
test_selection({'playlist_items': '-15::15'}, [], True)
|
||||||
|
|
||||||
def test_urlopen_no_file_protocol(self):
|
|
||||||
# see https://github.com/ytdl-org/youtube-dl/issues/8227
|
|
||||||
ydl = YDL()
|
|
||||||
self.assertRaises(urllib.error.URLError, ydl.urlopen, 'file:///etc/passwd')
|
|
||||||
|
|
||||||
def test_do_not_override_ie_key_in_url_transparent(self):
|
def test_do_not_override_ie_key_in_url_transparent(self):
|
||||||
ydl = YDL()
|
ydl = YDL()
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
import re
|
import re
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
from yt_dlp.utils import YoutubeDLCookieJar
|
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||||
|
|
||||||
|
|
||||||
class TestYoutubeDLCookieJar(unittest.TestCase):
|
class TestYoutubeDLCookieJar(unittest.TestCase):
|
||||||
|
@ -47,6 +47,12 @@ def test_malformed_cookies(self):
|
||||||
# will be ignored
|
# will be ignored
|
||||||
self.assertFalse(cookiejar._cookies)
|
self.assertFalse(cookiejar._cookies)
|
||||||
|
|
||||||
|
def test_get_cookie_header(self):
|
||||||
|
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
|
||||||
|
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||||
|
header = cookiejar.get_cookie_header('https://www.foobar.foobar')
|
||||||
|
self.assertIn('HTTPONLY_COOKIE', header)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -49,32 +49,38 @@ def test_get_desktop_environment(self):
|
||||||
""" based on https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util_unittest.cc """
|
""" based on https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util_unittest.cc """
|
||||||
test_cases = [
|
test_cases = [
|
||||||
({}, _LinuxDesktopEnvironment.OTHER),
|
({}, _LinuxDesktopEnvironment.OTHER),
|
||||||
|
({'DESKTOP_SESSION': 'my_custom_de'}, _LinuxDesktopEnvironment.OTHER),
|
||||||
|
({'XDG_CURRENT_DESKTOP': 'my_custom_de'}, _LinuxDesktopEnvironment.OTHER),
|
||||||
|
|
||||||
({'DESKTOP_SESSION': 'gnome'}, _LinuxDesktopEnvironment.GNOME),
|
({'DESKTOP_SESSION': 'gnome'}, _LinuxDesktopEnvironment.GNOME),
|
||||||
({'DESKTOP_SESSION': 'mate'}, _LinuxDesktopEnvironment.GNOME),
|
({'DESKTOP_SESSION': 'mate'}, _LinuxDesktopEnvironment.GNOME),
|
||||||
({'DESKTOP_SESSION': 'kde4'}, _LinuxDesktopEnvironment.KDE),
|
({'DESKTOP_SESSION': 'kde4'}, _LinuxDesktopEnvironment.KDE4),
|
||||||
({'DESKTOP_SESSION': 'kde'}, _LinuxDesktopEnvironment.KDE),
|
({'DESKTOP_SESSION': 'kde'}, _LinuxDesktopEnvironment.KDE3),
|
||||||
({'DESKTOP_SESSION': 'xfce'}, _LinuxDesktopEnvironment.XFCE),
|
({'DESKTOP_SESSION': 'xfce'}, _LinuxDesktopEnvironment.XFCE),
|
||||||
|
|
||||||
({'GNOME_DESKTOP_SESSION_ID': 1}, _LinuxDesktopEnvironment.GNOME),
|
({'GNOME_DESKTOP_SESSION_ID': 1}, _LinuxDesktopEnvironment.GNOME),
|
||||||
({'KDE_FULL_SESSION': 1}, _LinuxDesktopEnvironment.KDE),
|
({'KDE_FULL_SESSION': 1}, _LinuxDesktopEnvironment.KDE3),
|
||||||
|
({'KDE_FULL_SESSION': 1, 'DESKTOP_SESSION': 'kde4'}, _LinuxDesktopEnvironment.KDE4),
|
||||||
|
|
||||||
({'XDG_CURRENT_DESKTOP': 'X-Cinnamon'}, _LinuxDesktopEnvironment.CINNAMON),
|
({'XDG_CURRENT_DESKTOP': 'X-Cinnamon'}, _LinuxDesktopEnvironment.CINNAMON),
|
||||||
|
({'XDG_CURRENT_DESKTOP': 'Deepin'}, _LinuxDesktopEnvironment.DEEPIN),
|
||||||
({'XDG_CURRENT_DESKTOP': 'GNOME'}, _LinuxDesktopEnvironment.GNOME),
|
({'XDG_CURRENT_DESKTOP': 'GNOME'}, _LinuxDesktopEnvironment.GNOME),
|
||||||
({'XDG_CURRENT_DESKTOP': 'GNOME:GNOME-Classic'}, _LinuxDesktopEnvironment.GNOME),
|
({'XDG_CURRENT_DESKTOP': 'GNOME:GNOME-Classic'}, _LinuxDesktopEnvironment.GNOME),
|
||||||
({'XDG_CURRENT_DESKTOP': 'GNOME : GNOME-Classic'}, _LinuxDesktopEnvironment.GNOME),
|
({'XDG_CURRENT_DESKTOP': 'GNOME : GNOME-Classic'}, _LinuxDesktopEnvironment.GNOME),
|
||||||
|
|
||||||
({'XDG_CURRENT_DESKTOP': 'Unity', 'DESKTOP_SESSION': 'gnome-fallback'}, _LinuxDesktopEnvironment.GNOME),
|
({'XDG_CURRENT_DESKTOP': 'Unity', 'DESKTOP_SESSION': 'gnome-fallback'}, _LinuxDesktopEnvironment.GNOME),
|
||||||
({'XDG_CURRENT_DESKTOP': 'KDE', 'KDE_SESSION_VERSION': '5'}, _LinuxDesktopEnvironment.KDE),
|
({'XDG_CURRENT_DESKTOP': 'KDE', 'KDE_SESSION_VERSION': '5'}, _LinuxDesktopEnvironment.KDE5),
|
||||||
({'XDG_CURRENT_DESKTOP': 'KDE'}, _LinuxDesktopEnvironment.KDE),
|
({'XDG_CURRENT_DESKTOP': 'KDE', 'KDE_SESSION_VERSION': '6'}, _LinuxDesktopEnvironment.KDE6),
|
||||||
|
({'XDG_CURRENT_DESKTOP': 'KDE'}, _LinuxDesktopEnvironment.KDE4),
|
||||||
({'XDG_CURRENT_DESKTOP': 'Pantheon'}, _LinuxDesktopEnvironment.PANTHEON),
|
({'XDG_CURRENT_DESKTOP': 'Pantheon'}, _LinuxDesktopEnvironment.PANTHEON),
|
||||||
|
({'XDG_CURRENT_DESKTOP': 'UKUI'}, _LinuxDesktopEnvironment.UKUI),
|
||||||
({'XDG_CURRENT_DESKTOP': 'Unity'}, _LinuxDesktopEnvironment.UNITY),
|
({'XDG_CURRENT_DESKTOP': 'Unity'}, _LinuxDesktopEnvironment.UNITY),
|
||||||
({'XDG_CURRENT_DESKTOP': 'Unity:Unity7'}, _LinuxDesktopEnvironment.UNITY),
|
({'XDG_CURRENT_DESKTOP': 'Unity:Unity7'}, _LinuxDesktopEnvironment.UNITY),
|
||||||
({'XDG_CURRENT_DESKTOP': 'Unity:Unity8'}, _LinuxDesktopEnvironment.UNITY),
|
({'XDG_CURRENT_DESKTOP': 'Unity:Unity8'}, _LinuxDesktopEnvironment.UNITY),
|
||||||
]
|
]
|
||||||
|
|
||||||
for env, expected_desktop_environment in test_cases:
|
for env, expected_desktop_environment in test_cases:
|
||||||
self.assertEqual(_get_linux_desktop_environment(env), expected_desktop_environment)
|
self.assertEqual(_get_linux_desktop_environment(env, Logger()), expected_desktop_environment)
|
||||||
|
|
||||||
def test_chrome_cookie_decryptor_linux_derive_key(self):
|
def test_chrome_cookie_decryptor_linux_derive_key(self):
|
||||||
key = LinuxChromeCookieDecryptor.derive_key(b'abc')
|
key = LinuxChromeCookieDecryptor.derive_key(b'abc')
|
||||||
|
|
|
@ -7,40 +7,190 @@
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
import gzip
|
||||||
|
import http.cookiejar
|
||||||
import http.server
|
import http.server
|
||||||
|
import io
|
||||||
|
import pathlib
|
||||||
import ssl
|
import ssl
|
||||||
|
import tempfile
|
||||||
import threading
|
import threading
|
||||||
|
import urllib.error
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
import zlib
|
||||||
|
|
||||||
from test.helper import http_server_port
|
from test.helper import http_server_port
|
||||||
from yt_dlp import YoutubeDL
|
from yt_dlp import YoutubeDL
|
||||||
|
from yt_dlp.dependencies import brotli
|
||||||
|
from yt_dlp.utils import sanitized_Request, urlencode_postdata
|
||||||
|
|
||||||
|
from .helper import FakeYDL
|
||||||
|
|
||||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
|
||||||
class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
|
class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
|
||||||
|
protocol_version = 'HTTP/1.1'
|
||||||
|
|
||||||
def log_message(self, format, *args):
|
def log_message(self, format, *args):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def _headers(self):
|
||||||
|
payload = str(self.headers).encode('utf-8')
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header('Content-Type', 'application/json')
|
||||||
|
self.send_header('Content-Length', str(len(payload)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(payload)
|
||||||
|
|
||||||
|
def _redirect(self):
|
||||||
|
self.send_response(int(self.path[len('/redirect_'):]))
|
||||||
|
self.send_header('Location', '/method')
|
||||||
|
self.send_header('Content-Length', '0')
|
||||||
|
self.end_headers()
|
||||||
|
|
||||||
|
def _method(self, method, payload=None):
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header('Content-Length', str(len(payload or '')))
|
||||||
|
self.send_header('Method', method)
|
||||||
|
self.end_headers()
|
||||||
|
if payload:
|
||||||
|
self.wfile.write(payload)
|
||||||
|
|
||||||
|
def _status(self, status):
|
||||||
|
payload = f'<html>{status} NOT FOUND</html>'.encode()
|
||||||
|
self.send_response(int(status))
|
||||||
|
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||||
|
self.send_header('Content-Length', str(len(payload)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(payload)
|
||||||
|
|
||||||
|
def _read_data(self):
|
||||||
|
if 'Content-Length' in self.headers:
|
||||||
|
return self.rfile.read(int(self.headers['Content-Length']))
|
||||||
|
|
||||||
|
def do_POST(self):
|
||||||
|
data = self._read_data()
|
||||||
|
if self.path.startswith('/redirect_'):
|
||||||
|
self._redirect()
|
||||||
|
elif self.path.startswith('/method'):
|
||||||
|
self._method('POST', data)
|
||||||
|
elif self.path.startswith('/headers'):
|
||||||
|
self._headers()
|
||||||
|
else:
|
||||||
|
self._status(404)
|
||||||
|
|
||||||
|
def do_HEAD(self):
|
||||||
|
if self.path.startswith('/redirect_'):
|
||||||
|
self._redirect()
|
||||||
|
elif self.path.startswith('/method'):
|
||||||
|
self._method('HEAD')
|
||||||
|
else:
|
||||||
|
self._status(404)
|
||||||
|
|
||||||
|
def do_PUT(self):
|
||||||
|
data = self._read_data()
|
||||||
|
if self.path.startswith('/redirect_'):
|
||||||
|
self._redirect()
|
||||||
|
elif self.path.startswith('/method'):
|
||||||
|
self._method('PUT', data)
|
||||||
|
else:
|
||||||
|
self._status(404)
|
||||||
|
|
||||||
def do_GET(self):
|
def do_GET(self):
|
||||||
if self.path == '/video.html':
|
if self.path == '/video.html':
|
||||||
|
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||||
self.send_response(200)
|
self.send_response(200)
|
||||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||||
|
self.send_header('Content-Length', str(len(payload))) # required for persistent connections
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
self.wfile.write(b'<html><video src="/vid.mp4" /></html>')
|
self.wfile.write(payload)
|
||||||
elif self.path == '/vid.mp4':
|
elif self.path == '/vid.mp4':
|
||||||
|
payload = b'\x00\x00\x00\x00\x20\x66\x74[video]'
|
||||||
self.send_response(200)
|
self.send_response(200)
|
||||||
self.send_header('Content-Type', 'video/mp4')
|
self.send_header('Content-Type', 'video/mp4')
|
||||||
|
self.send_header('Content-Length', str(len(payload)))
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]')
|
self.wfile.write(payload)
|
||||||
elif self.path == '/%E4%B8%AD%E6%96%87.html':
|
elif self.path == '/%E4%B8%AD%E6%96%87.html':
|
||||||
|
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||||
self.send_response(200)
|
self.send_response(200)
|
||||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||||
|
self.send_header('Content-Length', str(len(payload)))
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
self.wfile.write(b'<html><video src="/vid.mp4" /></html>')
|
self.wfile.write(payload)
|
||||||
|
elif self.path == '/%c7%9f':
|
||||||
|
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||||
|
self.send_header('Content-Length', str(len(payload)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(payload)
|
||||||
|
elif self.path.startswith('/redirect_'):
|
||||||
|
self._redirect()
|
||||||
|
elif self.path.startswith('/method'):
|
||||||
|
self._method('GET')
|
||||||
|
elif self.path.startswith('/headers'):
|
||||||
|
self._headers()
|
||||||
|
elif self.path == '/trailing_garbage':
|
||||||
|
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||||
|
self.send_header('Content-Encoding', 'gzip')
|
||||||
|
buf = io.BytesIO()
|
||||||
|
with gzip.GzipFile(fileobj=buf, mode='wb') as f:
|
||||||
|
f.write(payload)
|
||||||
|
compressed = buf.getvalue() + b'trailing garbage'
|
||||||
|
self.send_header('Content-Length', str(len(compressed)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(compressed)
|
||||||
|
elif self.path == '/302-non-ascii-redirect':
|
||||||
|
new_url = f'http://127.0.0.1:{http_server_port(self.server)}/中文.html'
|
||||||
|
self.send_response(301)
|
||||||
|
self.send_header('Location', new_url)
|
||||||
|
self.send_header('Content-Length', '0')
|
||||||
|
self.end_headers()
|
||||||
|
elif self.path == '/content-encoding':
|
||||||
|
encodings = self.headers.get('ytdl-encoding', '')
|
||||||
|
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||||
|
for encoding in filter(None, (e.strip() for e in encodings.split(','))):
|
||||||
|
if encoding == 'br' and brotli:
|
||||||
|
payload = brotli.compress(payload)
|
||||||
|
elif encoding == 'gzip':
|
||||||
|
buf = io.BytesIO()
|
||||||
|
with gzip.GzipFile(fileobj=buf, mode='wb') as f:
|
||||||
|
f.write(payload)
|
||||||
|
payload = buf.getvalue()
|
||||||
|
elif encoding == 'deflate':
|
||||||
|
payload = zlib.compress(payload)
|
||||||
|
elif encoding == 'unsupported':
|
||||||
|
payload = b'raw'
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
self._status(415)
|
||||||
|
return
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header('Content-Encoding', encodings)
|
||||||
|
self.send_header('Content-Length', str(len(payload)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(payload)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
assert False
|
self._status(404)
|
||||||
|
|
||||||
|
def send_header(self, keyword, value):
|
||||||
|
"""
|
||||||
|
Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
|
||||||
|
This is against what is defined in RFC 3986, however we need to test we support this
|
||||||
|
since some sites incorrectly do this.
|
||||||
|
"""
|
||||||
|
if keyword.lower() == 'connection':
|
||||||
|
return super().send_header(keyword, value)
|
||||||
|
|
||||||
|
if not hasattr(self, '_headers_buffer'):
|
||||||
|
self._headers_buffer = []
|
||||||
|
|
||||||
|
self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode())
|
||||||
|
|
||||||
|
|
||||||
class FakeLogger:
|
class FakeLogger:
|
||||||
|
@ -56,36 +206,177 @@ def error(self, msg):
|
||||||
|
|
||||||
class TestHTTP(unittest.TestCase):
|
class TestHTTP(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.httpd = http.server.HTTPServer(
|
# HTTP server
|
||||||
|
self.http_httpd = http.server.ThreadingHTTPServer(
|
||||||
('127.0.0.1', 0), HTTPTestRequestHandler)
|
('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||||
self.port = http_server_port(self.httpd)
|
self.http_port = http_server_port(self.http_httpd)
|
||||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
self.http_server_thread = threading.Thread(target=self.http_httpd.serve_forever)
|
||||||
self.server_thread.daemon = True
|
# FIXME: we should probably stop the http server thread after each test
|
||||||
self.server_thread.start()
|
# See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041
|
||||||
|
self.http_server_thread.daemon = True
|
||||||
|
self.http_server_thread.start()
|
||||||
|
|
||||||
|
# HTTPS server
|
||||||
class TestHTTPS(unittest.TestCase):
|
|
||||||
def setUp(self):
|
|
||||||
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||||
self.httpd = http.server.HTTPServer(
|
self.https_httpd = http.server.ThreadingHTTPServer(
|
||||||
('127.0.0.1', 0), HTTPTestRequestHandler)
|
('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||||
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||||
sslctx.load_cert_chain(certfn, None)
|
sslctx.load_cert_chain(certfn, None)
|
||||||
self.httpd.socket = sslctx.wrap_socket(self.httpd.socket, server_side=True)
|
self.https_httpd.socket = sslctx.wrap_socket(self.https_httpd.socket, server_side=True)
|
||||||
self.port = http_server_port(self.httpd)
|
self.https_port = http_server_port(self.https_httpd)
|
||||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
self.https_server_thread = threading.Thread(target=self.https_httpd.serve_forever)
|
||||||
self.server_thread.daemon = True
|
self.https_server_thread.daemon = True
|
||||||
self.server_thread.start()
|
self.https_server_thread.start()
|
||||||
|
|
||||||
def test_nocheckcertificate(self):
|
def test_nocheckcertificate(self):
|
||||||
ydl = YoutubeDL({'logger': FakeLogger()})
|
with FakeYDL({'logger': FakeLogger()}) as ydl:
|
||||||
self.assertRaises(
|
with self.assertRaises(urllib.error.URLError):
|
||||||
Exception,
|
ydl.urlopen(sanitized_Request(f'https://127.0.0.1:{self.https_port}/headers'))
|
||||||
ydl.extract_info, 'https://127.0.0.1:%d/video.html' % self.port)
|
|
||||||
|
|
||||||
ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
|
with FakeYDL({'logger': FakeLogger(), 'nocheckcertificate': True}) as ydl:
|
||||||
r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port)
|
r = ydl.urlopen(sanitized_Request(f'https://127.0.0.1:{self.https_port}/headers'))
|
||||||
self.assertEqual(r['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port)
|
self.assertEqual(r.status, 200)
|
||||||
|
r.close()
|
||||||
|
|
||||||
|
def test_percent_encode(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
# Unicode characters should be encoded with uppercase percent-encoding
|
||||||
|
res = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/中文.html'))
|
||||||
|
self.assertEqual(res.status, 200)
|
||||||
|
res.close()
|
||||||
|
# don't normalize existing percent encodings
|
||||||
|
res = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/%c7%9f'))
|
||||||
|
self.assertEqual(res.status, 200)
|
||||||
|
res.close()
|
||||||
|
|
||||||
|
def test_unicode_path_redirection(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
r = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
|
||||||
|
self.assertEqual(r.url, f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html')
|
||||||
|
r.close()
|
||||||
|
|
||||||
|
def test_redirect(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
def do_req(redirect_status, method):
|
||||||
|
data = b'testdata' if method in ('POST', 'PUT') else None
|
||||||
|
res = ydl.urlopen(sanitized_Request(
|
||||||
|
f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data))
|
||||||
|
return res.read().decode('utf-8'), res.headers.get('method', '')
|
||||||
|
|
||||||
|
# A 303 must either use GET or HEAD for subsequent request
|
||||||
|
self.assertEqual(do_req(303, 'POST'), ('', 'GET'))
|
||||||
|
self.assertEqual(do_req(303, 'HEAD'), ('', 'HEAD'))
|
||||||
|
|
||||||
|
self.assertEqual(do_req(303, 'PUT'), ('', 'GET'))
|
||||||
|
|
||||||
|
# 301 and 302 turn POST only into a GET
|
||||||
|
self.assertEqual(do_req(301, 'POST'), ('', 'GET'))
|
||||||
|
self.assertEqual(do_req(301, 'HEAD'), ('', 'HEAD'))
|
||||||
|
self.assertEqual(do_req(302, 'POST'), ('', 'GET'))
|
||||||
|
self.assertEqual(do_req(302, 'HEAD'), ('', 'HEAD'))
|
||||||
|
|
||||||
|
self.assertEqual(do_req(301, 'PUT'), ('testdata', 'PUT'))
|
||||||
|
self.assertEqual(do_req(302, 'PUT'), ('testdata', 'PUT'))
|
||||||
|
|
||||||
|
# 307 and 308 should not change method
|
||||||
|
for m in ('POST', 'PUT'):
|
||||||
|
self.assertEqual(do_req(307, m), ('testdata', m))
|
||||||
|
self.assertEqual(do_req(308, m), ('testdata', m))
|
||||||
|
|
||||||
|
self.assertEqual(do_req(307, 'HEAD'), ('', 'HEAD'))
|
||||||
|
self.assertEqual(do_req(308, 'HEAD'), ('', 'HEAD'))
|
||||||
|
|
||||||
|
# These should not redirect and instead raise an HTTPError
|
||||||
|
for code in (300, 304, 305, 306):
|
||||||
|
with self.assertRaises(urllib.error.HTTPError):
|
||||||
|
do_req(code, 'GET')
|
||||||
|
|
||||||
|
def test_content_type(self):
|
||||||
|
# https://github.com/yt-dlp/yt-dlp/commit/379a4f161d4ad3e40932dcf5aca6e6fb9715ab28
|
||||||
|
with FakeYDL({'nocheckcertificate': True}) as ydl:
|
||||||
|
# method should be auto-detected as POST
|
||||||
|
r = sanitized_Request(f'https://localhost:{self.https_port}/headers', data=urlencode_postdata({'test': 'test'}))
|
||||||
|
|
||||||
|
headers = ydl.urlopen(r).read().decode('utf-8')
|
||||||
|
self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
|
||||||
|
|
||||||
|
# test http
|
||||||
|
r = sanitized_Request(f'http://localhost:{self.http_port}/headers', data=urlencode_postdata({'test': 'test'}))
|
||||||
|
headers = ydl.urlopen(r).read().decode('utf-8')
|
||||||
|
self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
|
||||||
|
|
||||||
|
def test_cookiejar(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(
|
||||||
|
0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
|
||||||
|
False, '/headers', True, False, None, False, None, None, {}))
|
||||||
|
data = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
|
||||||
|
self.assertIn(b'Cookie: test=ytdlp', data)
|
||||||
|
|
||||||
|
def test_no_compression_compat_header(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
data = ydl.urlopen(
|
||||||
|
sanitized_Request(
|
||||||
|
f'http://127.0.0.1:{self.http_port}/headers',
|
||||||
|
headers={'Youtubedl-no-compression': True})).read()
|
||||||
|
self.assertIn(b'Accept-Encoding: identity', data)
|
||||||
|
self.assertNotIn(b'youtubedl-no-compression', data.lower())
|
||||||
|
|
||||||
|
def test_gzip_trailing_garbage(self):
|
||||||
|
# https://github.com/ytdl-org/youtube-dl/commit/aa3e950764337ef9800c936f4de89b31c00dfcf5
|
||||||
|
# https://github.com/ytdl-org/youtube-dl/commit/6f2ec15cee79d35dba065677cad9da7491ec6e6f
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
data = ydl.urlopen(sanitized_Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode('utf-8')
|
||||||
|
self.assertEqual(data, '<html><video src="/vid.mp4" /></html>')
|
||||||
|
|
||||||
|
@unittest.skipUnless(brotli, 'brotli support is not installed')
|
||||||
|
def test_brotli(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
res = ydl.urlopen(
|
||||||
|
sanitized_Request(
|
||||||
|
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||||
|
headers={'ytdl-encoding': 'br'}))
|
||||||
|
self.assertEqual(res.headers.get('Content-Encoding'), 'br')
|
||||||
|
self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
|
||||||
|
|
||||||
|
def test_deflate(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
res = ydl.urlopen(
|
||||||
|
sanitized_Request(
|
||||||
|
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||||
|
headers={'ytdl-encoding': 'deflate'}))
|
||||||
|
self.assertEqual(res.headers.get('Content-Encoding'), 'deflate')
|
||||||
|
self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
|
||||||
|
|
||||||
|
def test_gzip(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
res = ydl.urlopen(
|
||||||
|
sanitized_Request(
|
||||||
|
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||||
|
headers={'ytdl-encoding': 'gzip'}))
|
||||||
|
self.assertEqual(res.headers.get('Content-Encoding'), 'gzip')
|
||||||
|
self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
|
||||||
|
|
||||||
|
def test_multiple_encodings(self):
|
||||||
|
# https://www.rfc-editor.org/rfc/rfc9110.html#section-8.4
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
|
||||||
|
res = ydl.urlopen(
|
||||||
|
sanitized_Request(
|
||||||
|
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||||
|
headers={'ytdl-encoding': pair}))
|
||||||
|
self.assertEqual(res.headers.get('Content-Encoding'), pair)
|
||||||
|
self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
|
||||||
|
|
||||||
|
def test_unsupported_encoding(self):
|
||||||
|
# it should return the raw content
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
res = ydl.urlopen(
|
||||||
|
sanitized_Request(
|
||||||
|
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||||
|
headers={'ytdl-encoding': 'unsupported'}))
|
||||||
|
self.assertEqual(res.headers.get('Content-Encoding'), 'unsupported')
|
||||||
|
self.assertEqual(res.read(), b'raw')
|
||||||
|
|
||||||
|
|
||||||
class TestClientCert(unittest.TestCase):
|
class TestClientCert(unittest.TestCase):
|
||||||
|
@ -112,8 +403,8 @@ def _run_test(self, **params):
|
||||||
'nocheckcertificate': True,
|
'nocheckcertificate': True,
|
||||||
**params,
|
**params,
|
||||||
})
|
})
|
||||||
r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port)
|
r = ydl.extract_info(f'https://127.0.0.1:{self.port}/video.html')
|
||||||
self.assertEqual(r['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port)
|
self.assertEqual(r['url'], f'https://127.0.0.1:{self.port}/vid.mp4')
|
||||||
|
|
||||||
def test_certificate_combined_nopass(self):
|
def test_certificate_combined_nopass(self):
|
||||||
self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithkey.crt'))
|
self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithkey.crt'))
|
||||||
|
@ -188,5 +479,22 @@ def test_proxy_with_idn(self):
|
||||||
self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
|
self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
|
||||||
|
|
||||||
|
|
||||||
|
class TestFileURL(unittest.TestCase):
|
||||||
|
# See https://github.com/ytdl-org/youtube-dl/issues/8227
|
||||||
|
def test_file_urls(self):
|
||||||
|
tf = tempfile.NamedTemporaryFile(delete=False)
|
||||||
|
tf.write(b'foobar')
|
||||||
|
tf.close()
|
||||||
|
url = pathlib.Path(tf.name).as_uri()
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
self.assertRaisesRegex(
|
||||||
|
urllib.error.URLError, 'file:// URLs are explicitly disabled in yt-dlp for security reasons', ydl.urlopen, url)
|
||||||
|
with FakeYDL({'enable_file_urls': True}) as ydl:
|
||||||
|
res = ydl.urlopen(url)
|
||||||
|
self.assertEqual(res.read(), b'foobar')
|
||||||
|
res.close()
|
||||||
|
os.unlink(tf.name)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -8,458 +8,330 @@
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
import math
|
import math
|
||||||
import re
|
|
||||||
|
|
||||||
from yt_dlp.jsinterp import JS_Undefined, JSInterpreter
|
from yt_dlp.jsinterp import JS_Undefined, JSInterpreter
|
||||||
|
|
||||||
|
|
||||||
class TestJSInterpreter(unittest.TestCase):
|
class TestJSInterpreter(unittest.TestCase):
|
||||||
|
def _test(self, code, ret, func='f', args=()):
|
||||||
|
self.assertEqual(JSInterpreter(code).call_function(func, *args), ret)
|
||||||
|
|
||||||
def test_basic(self):
|
def test_basic(self):
|
||||||
jsi = JSInterpreter('function x(){;}')
|
jsi = JSInterpreter('function f(){;}')
|
||||||
self.assertEqual(jsi.call_function('x'), None)
|
self.assertEqual(repr(jsi.extract_function('f')), 'F<f>')
|
||||||
|
|
||||||
jsi = JSInterpreter('function x3(){return 42;}')
|
|
||||||
self.assertEqual(jsi.call_function('x3'), 42)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('function x3(){42}')
|
|
||||||
self.assertEqual(jsi.call_function('x3'), None)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('var x5 = function(){return 42;}')
|
|
||||||
self.assertEqual(jsi.call_function('x5'), 42)
|
|
||||||
|
|
||||||
def test_calc(self):
|
|
||||||
jsi = JSInterpreter('function x4(a){return 2*a+1;}')
|
|
||||||
self.assertEqual(jsi.call_function('x4', 3), 7)
|
|
||||||
|
|
||||||
def test_empty_return(self):
|
|
||||||
jsi = JSInterpreter('function f(){return; y()}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), None)
|
self.assertEqual(jsi.call_function('f'), None)
|
||||||
|
|
||||||
def test_morespace(self):
|
self._test('function f(){return 42;}', 42)
|
||||||
jsi = JSInterpreter('function x (a) { return 2 * a + 1 ; }')
|
self._test('function f(){42}', None)
|
||||||
self.assertEqual(jsi.call_function('x', 3), 7)
|
self._test('var f = function(){return 42;}', 42)
|
||||||
|
|
||||||
jsi = JSInterpreter('function f () { x = 2 ; return x; }')
|
def test_calc(self):
|
||||||
self.assertEqual(jsi.call_function('f'), 2)
|
self._test('function f(a){return 2*a+1;}', 7, args=[3])
|
||||||
|
|
||||||
|
def test_empty_return(self):
|
||||||
|
self._test('function f(){return; y()}', None)
|
||||||
|
|
||||||
|
def test_morespace(self):
|
||||||
|
self._test('function f (a) { return 2 * a + 1 ; }', 7, args=[3])
|
||||||
|
self._test('function f () { x = 2 ; return x; }', 2)
|
||||||
|
|
||||||
def test_strange_chars(self):
|
def test_strange_chars(self):
|
||||||
jsi = JSInterpreter('function $_xY1 ($_axY1) { var $_axY2 = $_axY1 + 1; return $_axY2; }')
|
self._test('function $_xY1 ($_axY1) { var $_axY2 = $_axY1 + 1; return $_axY2; }',
|
||||||
self.assertEqual(jsi.call_function('$_xY1', 20), 21)
|
21, args=[20], func='$_xY1')
|
||||||
|
|
||||||
def test_operators(self):
|
def test_operators(self):
|
||||||
jsi = JSInterpreter('function f(){return 1 << 5;}')
|
self._test('function f(){return 1 << 5;}', 32)
|
||||||
self.assertEqual(jsi.call_function('f'), 32)
|
self._test('function f(){return 2 ** 5}', 32)
|
||||||
|
self._test('function f(){return 19 & 21;}', 17)
|
||||||
jsi = JSInterpreter('function f(){return 2 ** 5}')
|
self._test('function f(){return 11 >> 2;}', 2)
|
||||||
self.assertEqual(jsi.call_function('f'), 32)
|
self._test('function f(){return []? 2+3: 4;}', 5)
|
||||||
|
self._test('function f(){return 1 == 2}', False)
|
||||||
jsi = JSInterpreter('function f(){return 19 & 21;}')
|
self._test('function f(){return 0 && 1 || 2;}', 2)
|
||||||
self.assertEqual(jsi.call_function('f'), 17)
|
self._test('function f(){return 0 ?? 42;}', 0)
|
||||||
|
self._test('function f(){return "life, the universe and everything" < 42;}', False)
|
||||||
jsi = JSInterpreter('function f(){return 11 >> 2;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 2)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('function f(){return []? 2+3: 4;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 5)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('function f(){return 1 == 2}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), False)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('function f(){return 0 && 1 || 2;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 2)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('function f(){return 0 ?? 42;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 0)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('function f(){return "life, the universe and everything" < 42;}')
|
|
||||||
self.assertFalse(jsi.call_function('f'))
|
|
||||||
|
|
||||||
def test_array_access(self):
|
def test_array_access(self):
|
||||||
jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}')
|
self._test('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}', [5, 2, 7])
|
||||||
self.assertEqual(jsi.call_function('f'), [5, 2, 7])
|
|
||||||
|
|
||||||
def test_parens(self):
|
def test_parens(self):
|
||||||
jsi = JSInterpreter('function f(){return (1) + (2) * ((( (( (((((3)))))) )) ));}')
|
self._test('function f(){return (1) + (2) * ((( (( (((((3)))))) )) ));}', 7)
|
||||||
self.assertEqual(jsi.call_function('f'), 7)
|
self._test('function f(){return (1 + 2) * 3;}', 9)
|
||||||
|
|
||||||
jsi = JSInterpreter('function f(){return (1 + 2) * 3;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 9)
|
|
||||||
|
|
||||||
def test_quotes(self):
|
def test_quotes(self):
|
||||||
jsi = JSInterpreter(R'function f(){return "a\"\\("}')
|
self._test(R'function f(){return "a\"\\("}', R'a"\(')
|
||||||
self.assertEqual(jsi.call_function('f'), R'a"\(')
|
|
||||||
|
|
||||||
def test_assignments(self):
|
def test_assignments(self):
|
||||||
jsi = JSInterpreter('function f(){var x = 20; x = 30 + 1; return x;}')
|
self._test('function f(){var x = 20; x = 30 + 1; return x;}', 31)
|
||||||
self.assertEqual(jsi.call_function('f'), 31)
|
self._test('function f(){var x = 20; x += 30 + 1; return x;}', 51)
|
||||||
|
self._test('function f(){var x = 20; x -= 30 + 1; return x;}', -11)
|
||||||
jsi = JSInterpreter('function f(){var x = 20; x += 30 + 1; return x;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 51)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('function f(){var x = 20; x -= 30 + 1; return x;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), -11)
|
|
||||||
|
|
||||||
|
@unittest.skip('Not implemented')
|
||||||
def test_comments(self):
|
def test_comments(self):
|
||||||
'Skipping: Not yet fully implemented'
|
self._test('''
|
||||||
return
|
function f() {
|
||||||
jsi = JSInterpreter('''
|
var x = /* 1 + */ 2;
|
||||||
function x() {
|
var y = /* 30
|
||||||
var x = /* 1 + */ 2;
|
* 40 */ 50;
|
||||||
var y = /* 30
|
return x + y;
|
||||||
* 40 */ 50;
|
}
|
||||||
return x + y;
|
''', 52)
|
||||||
}
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 52)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function f() {
|
function f() {
|
||||||
var x = "/*";
|
var x = "/*";
|
||||||
var y = 1 /* comment */ + 2;
|
var y = 1 /* comment */ + 2;
|
||||||
return y;
|
return y;
|
||||||
}
|
}
|
||||||
''')
|
''', 3)
|
||||||
self.assertEqual(jsi.call_function('f'), 3)
|
|
||||||
|
|
||||||
def test_precedence(self):
|
def test_precedence(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function x() {
|
function f() {
|
||||||
var a = [10, 20, 30, 40, 50];
|
var a = [10, 20, 30, 40, 50];
|
||||||
var b = 6;
|
var b = 6;
|
||||||
a[0]=a[b%a.length];
|
a[0]=a[b%a.length];
|
||||||
return a;
|
return a;
|
||||||
}''')
|
}
|
||||||
self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
|
''', [20, 20, 30, 40, 50])
|
||||||
|
|
||||||
def test_builtins(self):
|
def test_builtins(self):
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('function f() { return NaN }')
|
||||||
function x() { return NaN }
|
self.assertTrue(math.isnan(jsi.call_function('f')))
|
||||||
''')
|
|
||||||
self.assertTrue(math.isnan(jsi.call_function('x')))
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
def test_date(self):
|
||||||
function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; }
|
self._test('function f() { return new Date("Wednesday 31 December 1969 18:01:26 MDT") - 0; }', 86000)
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 86000)
|
jsi = JSInterpreter('function f(dt) { return new Date(dt) - 0; }')
|
||||||
jsi = JSInterpreter('''
|
self.assertEqual(jsi.call_function('f', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000)
|
||||||
function x(dt) { return new Date(dt) - 0; }
|
self.assertEqual(jsi.call_function('f', '12/31/1969 18:01:26 MDT'), 86000) # m/d/y
|
||||||
''')
|
self.assertEqual(jsi.call_function('f', '1 January 1970 00:00:00 UTC'), 0)
|
||||||
self.assertEqual(jsi.call_function('x', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000)
|
|
||||||
|
|
||||||
def test_call(self):
|
def test_call(self):
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x() { return 2; }
|
function x() { return 2; }
|
||||||
function y(a) { return x() + (a?a:0); }
|
function y(a) { return x() + (a?a:0); }
|
||||||
function z() { return y(3); }
|
function z() { return y(3); }
|
||||||
''')
|
''')
|
||||||
self.assertEqual(jsi.call_function('z'), 5)
|
self.assertEqual(jsi.call_function('z'), 5)
|
||||||
self.assertEqual(jsi.call_function('y'), 2)
|
self.assertEqual(jsi.call_function('y'), 2)
|
||||||
|
|
||||||
def test_if(self):
|
def test_if(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function x() {
|
function f() {
|
||||||
let a = 9;
|
let a = 9;
|
||||||
if (0==0) {a++}
|
if (0==0) {a++}
|
||||||
return a
|
return a
|
||||||
}''')
|
}
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
''', 10)
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function x() {
|
function f() {
|
||||||
if (0==0) {return 10}
|
if (0==0) {return 10}
|
||||||
}''')
|
}
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
''', 10)
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function x() {
|
function f() {
|
||||||
if (0!=0) {return 1}
|
if (0!=0) {return 1}
|
||||||
else {return 10}
|
else {return 10}
|
||||||
}''')
|
}
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
''', 10)
|
||||||
|
|
||||||
""" # Unsupported
|
""" # Unsupported
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function x() {
|
function f() {
|
||||||
if (0!=0) {return 1}
|
if (0!=0) {return 1}
|
||||||
else if (1==0) {return 2}
|
else if (1==0) {return 2}
|
||||||
else {return 10}
|
else {return 10}
|
||||||
}''')
|
}
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
''', 10)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def test_for_loop(self):
|
def test_for_loop(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { a=0; for (i=0; i-10; i++) {a++} return a }', 10)
|
||||||
function x() { a=0; for (i=0; i-10; i++) {a++} return a }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
|
||||||
|
|
||||||
def test_switch(self):
|
def test_switch(self):
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x(f) { switch(f){
|
function f(x) { switch(x){
|
||||||
case 1:f+=1;
|
case 1:x+=1;
|
||||||
case 2:f+=2;
|
case 2:x+=2;
|
||||||
case 3:f+=3;break;
|
case 3:x+=3;break;
|
||||||
case 4:f+=4;
|
case 4:x+=4;
|
||||||
default:f=0;
|
default:x=0;
|
||||||
} return f }
|
} return x }
|
||||||
''')
|
''')
|
||||||
self.assertEqual(jsi.call_function('x', 1), 7)
|
self.assertEqual(jsi.call_function('f', 1), 7)
|
||||||
self.assertEqual(jsi.call_function('x', 3), 6)
|
self.assertEqual(jsi.call_function('f', 3), 6)
|
||||||
self.assertEqual(jsi.call_function('x', 5), 0)
|
self.assertEqual(jsi.call_function('f', 5), 0)
|
||||||
|
|
||||||
def test_switch_default(self):
|
def test_switch_default(self):
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x(f) { switch(f){
|
function f(x) { switch(x){
|
||||||
case 2: f+=2;
|
case 2: x+=2;
|
||||||
default: f-=1;
|
default: x-=1;
|
||||||
case 5:
|
case 5:
|
||||||
case 6: f+=6;
|
case 6: x+=6;
|
||||||
case 0: break;
|
case 0: break;
|
||||||
case 1: f+=1;
|
case 1: x+=1;
|
||||||
} return f }
|
} return x }
|
||||||
''')
|
''')
|
||||||
self.assertEqual(jsi.call_function('x', 1), 2)
|
self.assertEqual(jsi.call_function('f', 1), 2)
|
||||||
self.assertEqual(jsi.call_function('x', 5), 11)
|
self.assertEqual(jsi.call_function('f', 5), 11)
|
||||||
self.assertEqual(jsi.call_function('x', 9), 14)
|
self.assertEqual(jsi.call_function('f', 9), 14)
|
||||||
|
|
||||||
def test_try(self):
|
def test_try(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { try{return 10} catch(e){return 5} }', 10)
|
||||||
function x() { try{return 10} catch(e){return 5} }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
|
||||||
|
|
||||||
def test_catch(self):
|
def test_catch(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { try{throw 10} catch(e){return 5} }', 5)
|
||||||
function x() { try{throw 10} catch(e){return 5} }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 5)
|
|
||||||
|
|
||||||
def test_finally(self):
|
def test_finally(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { try{throw 10} finally {return 42} }', 42)
|
||||||
function x() { try{throw 10} finally {return 42} }
|
self._test('function f() { try{throw 10} catch(e){return 5} finally {return 42} }', 42)
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 42)
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { try{throw 10} catch(e){return 5} finally {return 42} }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 42)
|
|
||||||
|
|
||||||
def test_nested_try(self):
|
def test_nested_try(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function x() {try {
|
function f() {try {
|
||||||
try{throw 10} finally {throw 42}
|
try{throw 10} finally {throw 42}
|
||||||
} catch(e){return 5} }
|
} catch(e){return 5} }
|
||||||
''')
|
''', 5)
|
||||||
self.assertEqual(jsi.call_function('x'), 5)
|
|
||||||
|
|
||||||
def test_for_loop_continue(self):
|
def test_for_loop_continue(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { a=0; for (i=0; i-10; i++) { continue; a++ } return a }', 0)
|
||||||
function x() { a=0; for (i=0; i-10; i++) { continue; a++ } return a }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 0)
|
|
||||||
|
|
||||||
def test_for_loop_break(self):
|
def test_for_loop_break(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { a=0; for (i=0; i-10; i++) { break; a++ } return a }', 0)
|
||||||
function x() { a=0; for (i=0; i-10; i++) { break; a++ } return a }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 0)
|
|
||||||
|
|
||||||
def test_for_loop_try(self):
|
def test_for_loop_try(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function x() {
|
function f() {
|
||||||
for (i=0; i-10; i++) { try { if (i == 5) throw i} catch {return 10} finally {break} };
|
for (i=0; i-10; i++) { try { if (i == 5) throw i} catch {return 10} finally {break} };
|
||||||
return 42 }
|
return 42 }
|
||||||
''')
|
''', 42)
|
||||||
self.assertEqual(jsi.call_function('x'), 42)
|
|
||||||
|
|
||||||
def test_literal_list(self):
|
def test_literal_list(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { return [1, 2, "asdf", [5, 6, 7]][3] }', [5, 6, 7])
|
||||||
function x() { return [1, 2, "asdf", [5, 6, 7]][3] }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [5, 6, 7])
|
|
||||||
|
|
||||||
def test_comma(self):
|
def test_comma(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { a=5; a -= 1, a+=3; return a }', 7)
|
||||||
function x() { a=5; a -= 1, a+=3; return a }
|
self._test('function f() { a=5; return (a -= 1, a+=3, a); }', 7)
|
||||||
''')
|
self._test('function f() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }', 5)
|
||||||
self.assertEqual(jsi.call_function('x'), 7)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { a=5; return (a -= 1, a+=3, a); }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 7)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 5)
|
|
||||||
|
|
||||||
def test_void(self):
|
def test_void(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { return void 42; }', None)
|
||||||
function x() { return void 42; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), None)
|
|
||||||
|
|
||||||
def test_return_function(self):
|
def test_return_function(self):
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x() { return [1, function(){return 1}][1] }
|
function f() { return [1, function(){return 1}][1] }
|
||||||
''')
|
''')
|
||||||
self.assertEqual(jsi.call_function('x')([]), 1)
|
self.assertEqual(jsi.call_function('f')([]), 1)
|
||||||
|
|
||||||
def test_null(self):
|
def test_null(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { return null; }', None)
|
||||||
function x() { return null; }
|
self._test('function f() { return [null > 0, null < 0, null == 0, null === 0]; }',
|
||||||
''')
|
[False, False, False, False])
|
||||||
self.assertEqual(jsi.call_function('x'), None)
|
self._test('function f() { return [null >= 0, null <= 0]; }', [True, True])
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { return [null > 0, null < 0, null == 0, null === 0]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [False, False, False, False])
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { return [null >= 0, null <= 0]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [True, True])
|
|
||||||
|
|
||||||
def test_undefined(self):
|
def test_undefined(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { return undefined === undefined; }', True)
|
||||||
function x() { return undefined === undefined; }
|
self._test('function f() { return undefined; }', JS_Undefined)
|
||||||
''')
|
self._test('function f() {return undefined ?? 42; }', 42)
|
||||||
self.assertEqual(jsi.call_function('x'), True)
|
self._test('function f() { let v; return v; }', JS_Undefined)
|
||||||
|
self._test('function f() { let v; return v**0; }', 1)
|
||||||
|
self._test('function f() { let v; return [v>42, v<=42, v&&42, 42&&v]; }',
|
||||||
|
[False, False, JS_Undefined, JS_Undefined])
|
||||||
|
|
||||||
|
self._test('''
|
||||||
|
function f() { return [
|
||||||
|
undefined === undefined,
|
||||||
|
undefined == undefined,
|
||||||
|
undefined == null,
|
||||||
|
undefined < undefined,
|
||||||
|
undefined > undefined,
|
||||||
|
undefined === 0,
|
||||||
|
undefined == 0,
|
||||||
|
undefined < 0,
|
||||||
|
undefined > 0,
|
||||||
|
undefined >= 0,
|
||||||
|
undefined <= 0,
|
||||||
|
undefined > null,
|
||||||
|
undefined < null,
|
||||||
|
undefined === null
|
||||||
|
]; }
|
||||||
|
''', list(map(bool, (1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))))
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x() { return undefined; }
|
function f() { let v; return [42+v, v+42, v**42, 42**v, 0**v]; }
|
||||||
''')
|
''')
|
||||||
self.assertEqual(jsi.call_function('x'), JS_Undefined)
|
for y in jsi.call_function('f'):
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { let v; return v; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), JS_Undefined)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { return [undefined === undefined, undefined == undefined, undefined < undefined, undefined > undefined]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [True, True, False, False])
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { return [undefined === 0, undefined == 0, undefined < 0, undefined > 0]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [False, False, False, False])
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { return [undefined >= 0, undefined <= 0]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [False, False])
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { return [undefined > null, undefined < null, undefined == null, undefined === null]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [False, False, True, False])
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { return [undefined === null, undefined == null, undefined < null, undefined > null]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [False, True, False, False])
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { let v; return [42+v, v+42, v**42, 42**v, 0**v]; }
|
|
||||||
''')
|
|
||||||
for y in jsi.call_function('x'):
|
|
||||||
self.assertTrue(math.isnan(y))
|
self.assertTrue(math.isnan(y))
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { let v; return v**0; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 1)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { let v; return [v>42, v<=42, v&&42, 42&&v]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [False, False, JS_Undefined, JS_Undefined])
|
|
||||||
|
|
||||||
jsi = JSInterpreter('function x(){return undefined ?? 42; }')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 42)
|
|
||||||
|
|
||||||
def test_object(self):
|
def test_object(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { return {}; }', {})
|
||||||
function x() { return {}; }
|
self._test('function f() { let a = {m1: 42, m2: 0 }; return [a["m1"], a.m2]; }', [42, 0])
|
||||||
''')
|
self._test('function f() { let a; return a?.qq; }', JS_Undefined)
|
||||||
self.assertEqual(jsi.call_function('x'), {})
|
self._test('function f() { let a = {m1: 42, m2: 0 }; return a?.qq; }', JS_Undefined)
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { let a = {m1: 42, m2: 0 }; return [a["m1"], a.m2]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [42, 0])
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { let a; return a?.qq; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), JS_Undefined)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { let a = {m1: 42, m2: 0 }; return a?.qq; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), JS_Undefined)
|
|
||||||
|
|
||||||
def test_regex(self):
|
def test_regex(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { let a=/,,[/,913,/](,)}/; }', None)
|
||||||
function x() { let a=/,,[/,913,/](,)}/; }
|
self._test('function f() { let a=/,,[/,913,/](,)}/; return a; }', R'/,,[/,913,/](,)}/0')
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), None)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
R''' # We are not compiling regex
|
||||||
function x() { let a=/,,[/,913,/](,)}/; return a; }
|
jsi = JSInterpreter('function f() { let a=/,,[/,913,/](,)}/; return a; }')
|
||||||
''')
|
self.assertIsInstance(jsi.call_function('f'), re.Pattern)
|
||||||
self.assertIsInstance(jsi.call_function('x'), re.Pattern)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('function f() { let a=/,,[/,913,/](,)}/i; return a; }')
|
||||||
function x() { let a=/,,[/,913,/](,)}/i; return a; }
|
self.assertEqual(jsi.call_function('f').flags & re.I, re.I)
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x').flags & re.I, re.I)
|
|
||||||
|
|
||||||
jsi = JSInterpreter(R'''
|
jsi = JSInterpreter(R'function f() { let a=/,][}",],()}(\[)/; return a; }')
|
||||||
function x() { let a=/,][}",],()}(\[)/; return a; }
|
self.assertEqual(jsi.call_function('f').pattern, r',][}",],()}(\[)')
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x').pattern, r',][}",],()}(\[)')
|
|
||||||
|
|
||||||
jsi = JSInterpreter(R'''
|
jsi = JSInterpreter(R'function f() { let a=[/[)\\]/]; return a[0]; }')
|
||||||
function x() { let a=[/[)\\]/]; return a[0]; }
|
self.assertEqual(jsi.call_function('f').pattern, r'[)\\]')
|
||||||
''')
|
'''
|
||||||
self.assertEqual(jsi.call_function('x').pattern, r'[)\\]')
|
|
||||||
|
@unittest.skip('Not implemented')
|
||||||
|
def test_replace(self):
|
||||||
|
self._test('function f() { let a="data-name".replace("data-", ""); return a }',
|
||||||
|
'name')
|
||||||
|
self._test('function f() { let a="data-name".replace(new RegExp("^.+-"), ""); return a; }',
|
||||||
|
'name')
|
||||||
|
self._test('function f() { let a="data-name".replace(/^.+-/, ""); return a; }',
|
||||||
|
'name')
|
||||||
|
self._test('function f() { let a="data-name".replace(/a/g, "o"); return a; }',
|
||||||
|
'doto-nome')
|
||||||
|
self._test('function f() { let a="data-name".replaceAll("a", "o"); return a; }',
|
||||||
|
'doto-nome')
|
||||||
|
|
||||||
def test_char_code_at(self):
|
def test_char_code_at(self):
|
||||||
jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}')
|
jsi = JSInterpreter('function f(i){return "test".charCodeAt(i)}')
|
||||||
self.assertEqual(jsi.call_function('x', 0), 116)
|
self.assertEqual(jsi.call_function('f', 0), 116)
|
||||||
self.assertEqual(jsi.call_function('x', 1), 101)
|
self.assertEqual(jsi.call_function('f', 1), 101)
|
||||||
self.assertEqual(jsi.call_function('x', 2), 115)
|
self.assertEqual(jsi.call_function('f', 2), 115)
|
||||||
self.assertEqual(jsi.call_function('x', 3), 116)
|
self.assertEqual(jsi.call_function('f', 3), 116)
|
||||||
self.assertEqual(jsi.call_function('x', 4), None)
|
self.assertEqual(jsi.call_function('f', 4), None)
|
||||||
self.assertEqual(jsi.call_function('x', 'not_a_number'), 116)
|
self.assertEqual(jsi.call_function('f', 'not_a_number'), 116)
|
||||||
|
|
||||||
def test_bitwise_operators_overflow(self):
|
def test_bitwise_operators_overflow(self):
|
||||||
jsi = JSInterpreter('function x(){return -524999584 << 5}')
|
self._test('function f(){return -524999584 << 5}', 379882496)
|
||||||
self.assertEqual(jsi.call_function('x'), 379882496)
|
self._test('function f(){return 1236566549 << 5}', 915423904)
|
||||||
|
|
||||||
jsi = JSInterpreter('function x(){return 1236566549 << 5}')
|
def test_bitwise_operators_typecast(self):
|
||||||
self.assertEqual(jsi.call_function('x'), 915423904)
|
self._test('function f(){return null << 5}', 0)
|
||||||
|
self._test('function f(){return undefined >> 5}', 0)
|
||||||
|
self._test('function f(){return 42 << NaN}', 42)
|
||||||
|
|
||||||
def test_negative(self):
|
def test_negative(self):
|
||||||
jsi = JSInterpreter("function f(){return 2 * -2.0;}")
|
self._test('function f(){return 2 * -2.0 ;}', -4)
|
||||||
self.assertEqual(jsi.call_function('f'), -4)
|
self._test('function f(){return 2 - - -2 ;}', 0)
|
||||||
|
self._test('function f(){return 2 - - - -2 ;}', 4)
|
||||||
|
self._test('function f(){return 2 - + + - -2;}', 0)
|
||||||
|
self._test('function f(){return 2 + - + - -2;}', 0)
|
||||||
|
|
||||||
jsi = JSInterpreter('function f(){return 2 - - -2;}')
|
@unittest.skip('Not implemented')
|
||||||
self.assertEqual(jsi.call_function('f'), 0)
|
def test_packed(self):
|
||||||
|
jsi = JSInterpreter('''function f(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''')
|
||||||
jsi = JSInterpreter('function f(){return 2 - - - -2;}')
|
self.assertEqual(jsi.call_function('f', '''h 7=g("1j");7.7h({7g:[{33:"w://7f-7e-7d-7c.v.7b/7a/79/78/77/76.74?t=73&s=2s&e=72&f=2t&71=70.0.0.1&6z=6y&6x=6w"}],6v:"w://32.v.u/6u.31",16:"r%",15:"r%",6t:"6s",6r:"",6q:"l",6p:"l",6o:"6n",6m:\'6l\',6k:"6j",9:[{33:"/2u?b=6i&n=50&6h=w://32.v.u/6g.31",6f:"6e"}],1y:{6d:1,6c:\'#6b\',6a:\'#69\',68:"67",66:30,65:r,},"64":{63:"%62 2m%m%61%5z%5y%5x.u%5w%5v%5u.2y%22 2k%m%1o%22 5t%m%1o%22 5s%m%1o%22 2j%m%5r%22 16%m%5q%22 15%m%5p%22 5o%2z%5n%5m%2z",5l:"w://v.u/d/1k/5k.2y",5j:[]},\'5i\':{"5h":"5g"},5f:"5e",5d:"w://v.u",5c:{},5b:l,1x:[0.25,0.50,0.75,1,1.25,1.5,2]});h 1m,1n,5a;h 59=0,58=0;h 7=g("1j");h 2x=0,57=0,56=0;$.55({54:{\'53-52\':\'2i-51\'}});7.j(\'4z\',6(x){c(5>0&&x.1l>=5&&1n!=1){1n=1;$(\'q.4y\').4x(\'4w\')}});7.j(\'13\',6(x){2x=x.1l});7.j(\'2g\',6(x){2w(x)});7.j(\'4v\',6(){$(\'q.2v\').4u()});6 2w(x){$(\'q.2v\').4t();c(1m)19;1m=1;17=0;c(4s.4r===l){17=1}$.4q(\'/2u?b=4p&2l=1k&4o=2t-4n-4m-2s-4l&4k=&4j=&4i=&17=\'+17,6(2r){$(\'#4h\').4g(2r)});$(\'.3-8-4f-4e:4d("4c")\').2h(6(e){2q();g().4b(0);g().4a(l)});6 2q(){h $14=$("<q />").2p({1l:"49",16:"r%",15:"r%",48:0,2n:0,2o:47,46:"45(10%, 10%, 10%, 0.4)","44-43":"42"});$("<41 />").2p({16:"60%",15:"60%",2o:40,"3z-2n":"3y"}).3x({\'2m\':\'/?b=3w&2l=1k\',\'2k\':\'0\',\'2j\':\'2i\'}).2f($14);$14.2h(6(){$(3v).3u();g().2g()});$14.2f($(\'#1j\'))}g().13(0);}6 3t(){h 9=7.1b(2e);2d.2c(9);c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==2e){2d.2c(\'!!=\'+i);7.1p(i)}}}}7.j(\'3s\',6(){g().1h("/2a/3r.29","3q 10 28",6(){g().13(g().27()+10)},"2b");$("q[26=2b]").23().21(\'.3-20-1z\');g().1h("/2a/3p.29","3o 10 28",6(){h 12=g().27()-10;c(12<0)12=0;g().13(12)},"24");$("q[26=24]").23().21(\'.3-20-1z\');});6 1i(){}7.j(\'3n\',6(){1i()});7.j(\'3m\',6(){1i()});7.j("k",6(y){h 9=7.1b();c(9.n<2)19;$(\'.3-8-3l-3k\').3j(6(){$(\'#3-8-a-k\').1e(\'3-8-a-z\');$(\'.3-a-k\').p(\'o-1f\',\'11\')});7.1h("/3i/3h.3g","3f 3e",6(){$(\'.3-1w\').3d(\'3-8-1v\');$(\'.3-8-1y, .3-8-1x\').p(\'o-1g\',\'11\');c($(\'.3-1w\').3c(\'3-8-1v\')){$(\'.3-a-k\').p(\'o-1g\',\'l\');$(\'.3-a-k\').p(\'o-1f\',\'l\');$(\'.3-8-a\').1e(\'3-8-a-z\');$(\'.3-8-a:1u\').3b(\'3-8-a-z\')}3a{$(\'.3-a-k\').p(\'o-1g\',\'11\');$(\'.3-a-k\').p(\'o-1f\',\'11\');$(\'.3-8-a:1u\').1e(\'3-8-a-z\')}},"39");7.j("38",6(y){1d.37(\'1c\',y.9[y.36].1a)});c(1d.1t(\'1c\')){35("1s(1d.1t(\'1c\'));",34)}});h 18;6 1s(1q){h 9=7.1b();c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==1q){c(i==18){19}18=i;7.1p(i)}}}}',36,270,'|||jw|||function|player|settings|tracks|submenu||if||||jwplayer|var||on|audioTracks|true|3D|length|aria|attr|div|100|||sx|filemoon|https||event|active||false|tt|seek|dd|height|width|adb|current_audio|return|name|getAudioTracks|default_audio|localStorage|removeClass|expanded|checked|addButton|callMeMaybe|vplayer|0fxcyc2ajhp1|position|vvplay|vvad|220|setCurrentAudioTrack|audio_name|for|audio_set|getItem|last|open|controls|playbackRates|captions|rewind|icon|insertAfter||detach|ff00||button|getPosition|sec|png|player8|ff11|log|console|track_name|appendTo|play|click|no|scrolling|frameborder|file_code|src|top|zIndex|css|showCCform|data|1662367683|383371|dl|video_ad|doPlay|prevt|mp4|3E||jpg|thumbs|file|300|setTimeout|currentTrack|setItem|audioTrackChanged|dualSound|else|addClass|hasClass|toggleClass|Track|Audio|svg|dualy|images|mousedown|buttons|topbar|playAttemptFailed|beforePlay|Rewind|fr|Forward|ff|ready|set_audio_track|remove|this|upload_srt|prop|50px|margin|1000001|iframe|center|align|text|rgba|background|1000000|left|absolute|pause|setCurrentCaptions|Upload|contains|item|content|html|fviews|referer|prem|embed|3e57249ef633e0d03bf76ceb8d8a4b65|216|83|hash|view|get|TokenZir|window|hide|show|complete|slow|fadeIn|video_ad_fadein|time||cache|Cache|Content|headers|ajaxSetup|v2done|tott|vastdone2|vastdone1|vvbefore|playbackRateControls|cast|aboutlink|FileMoon|abouttext|UHD|1870|qualityLabels|sites|GNOME_POWER|link|2Fiframe|3C|allowfullscreen|22360|22640|22no|marginheight|marginwidth|2FGNOME_POWER|2F0fxcyc2ajhp1|2Fe|2Ffilemoon|2F|3A||22https|3Ciframe|code|sharing|fontOpacity|backgroundOpacity|Tahoma|fontFamily|303030|backgroundColor|FFFFFF|color|userFontScale|thumbnails|kind|0fxcyc2ajhp10000|url|get_slides|start|startparam|none|preload|html5|primary|hlshtml|androidhls|duration|uniform|stretching|0fxcyc2ajhp1_xt|image|2048|sp|6871|asn|127|srv|43200|_g3XlBcu2lmD9oDexD2NLWSmah2Nu3XcDrl93m9PwXY|m3u8||master|0fxcyc2ajhp1_x|00076|01|hls2|to|s01|delivery|storage|moon|sources|setup'''.split('|')))
|
||||||
self.assertEqual(jsi.call_function('f'), 4)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('function f(){return 2 - + + - -2;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 0)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('function f(){return 2 + - + - -2;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 0)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
|
import warnings
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
@ -112,6 +113,7 @@
|
||||||
subtitles_filename,
|
subtitles_filename,
|
||||||
timeconvert,
|
timeconvert,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
|
try_call,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
|
@ -123,6 +125,7 @@
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urljoin,
|
urljoin,
|
||||||
urshift,
|
urshift,
|
||||||
|
variadic,
|
||||||
version_tuple,
|
version_tuple,
|
||||||
xpath_attr,
|
xpath_attr,
|
||||||
xpath_element,
|
xpath_element,
|
||||||
|
@ -1979,6 +1982,35 @@ def test_get_compatible_ext(self):
|
||||||
self.assertEqual(get_compatible_ext(
|
self.assertEqual(get_compatible_ext(
|
||||||
vcodecs=['av1'], acodecs=['mp4a'], vexts=['webm'], aexts=['m4a'], preferences=('webm', 'mkv')), 'mkv')
|
vcodecs=['av1'], acodecs=['mp4a'], vexts=['webm'], aexts=['m4a'], preferences=('webm', 'mkv')), 'mkv')
|
||||||
|
|
||||||
|
def test_try_call(self):
|
||||||
|
def total(*x, **kwargs):
|
||||||
|
return sum(x) + sum(kwargs.values())
|
||||||
|
|
||||||
|
self.assertEqual(try_call(None), None,
|
||||||
|
msg='not a fn should give None')
|
||||||
|
self.assertEqual(try_call(lambda: 1), 1,
|
||||||
|
msg='int fn with no expected_type should give int')
|
||||||
|
self.assertEqual(try_call(lambda: 1, expected_type=int), 1,
|
||||||
|
msg='int fn with expected_type int should give int')
|
||||||
|
self.assertEqual(try_call(lambda: 1, expected_type=dict), None,
|
||||||
|
msg='int fn with wrong expected_type should give None')
|
||||||
|
self.assertEqual(try_call(total, args=(0, 1, 0, ), expected_type=int), 1,
|
||||||
|
msg='fn should accept arglist')
|
||||||
|
self.assertEqual(try_call(total, kwargs={'a': 0, 'b': 1, 'c': 0}, expected_type=int), 1,
|
||||||
|
msg='fn should accept kwargs')
|
||||||
|
self.assertEqual(try_call(lambda: 1, expected_type=dict), None,
|
||||||
|
msg='int fn with no expected_type should give None')
|
||||||
|
self.assertEqual(try_call(lambda x: {}, total, args=(42, ), expected_type=int), 42,
|
||||||
|
msg='expect first int result with expected_type int')
|
||||||
|
|
||||||
|
def test_variadic(self):
|
||||||
|
self.assertEqual(variadic(None), (None, ))
|
||||||
|
self.assertEqual(variadic('spam'), ('spam', ))
|
||||||
|
self.assertEqual(variadic('spam', allowed_types=dict), 'spam')
|
||||||
|
with warnings.catch_warnings():
|
||||||
|
warnings.simplefilter('ignore')
|
||||||
|
self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam')
|
||||||
|
|
||||||
def test_traverse_obj(self):
|
def test_traverse_obj(self):
|
||||||
_TEST_DATA = {
|
_TEST_DATA = {
|
||||||
100: 100,
|
100: 100,
|
||||||
|
|
|
@ -146,6 +146,10 @@
|
||||||
'https://www.youtube.com/s/player/6f20102c/player_ias.vflset/en_US/base.js',
|
'https://www.youtube.com/s/player/6f20102c/player_ias.vflset/en_US/base.js',
|
||||||
'lE8DhoDmKqnmJJ', 'pJTTX6XyJP2BYw',
|
'lE8DhoDmKqnmJJ', 'pJTTX6XyJP2BYw',
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js',
|
||||||
|
'aCi3iElgd2kq0bxVbQ', 'QX1y8jGb2IbZ0w',
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
|
import string
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
|
@ -20,10 +21,9 @@
|
||||||
import tokenize
|
import tokenize
|
||||||
import traceback
|
import traceback
|
||||||
import unicodedata
|
import unicodedata
|
||||||
import urllib.request
|
|
||||||
from string import Formatter, ascii_letters
|
|
||||||
|
|
||||||
from .cache import Cache
|
from .cache import Cache
|
||||||
|
from .compat import urllib # isort: split
|
||||||
from .compat import compat_os_name, compat_shlex_quote
|
from .compat import compat_os_name, compat_shlex_quote
|
||||||
from .cookies import load_cookies
|
from .cookies import load_cookies
|
||||||
from .downloader import (
|
from .downloader import (
|
||||||
|
@ -129,7 +129,6 @@
|
||||||
parse_filesize,
|
parse_filesize,
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
prepend_extension,
|
prepend_extension,
|
||||||
register_socks_protocols,
|
|
||||||
remove_terminal_sequences,
|
remove_terminal_sequences,
|
||||||
render_table,
|
render_table,
|
||||||
replace_extension,
|
replace_extension,
|
||||||
|
@ -195,6 +194,7 @@ class YoutubeDL:
|
||||||
ap_username: Multiple-system operator account username.
|
ap_username: Multiple-system operator account username.
|
||||||
ap_password: Multiple-system operator account password.
|
ap_password: Multiple-system operator account password.
|
||||||
usenetrc: Use netrc for authentication instead.
|
usenetrc: Use netrc for authentication instead.
|
||||||
|
netrc_location: Location of the netrc file. Defaults to ~/.netrc.
|
||||||
verbose: Print additional info to stdout.
|
verbose: Print additional info to stdout.
|
||||||
quiet: Do not print messages to stdout.
|
quiet: Do not print messages to stdout.
|
||||||
no_warnings: Do not print out anything for warnings.
|
no_warnings: Do not print out anything for warnings.
|
||||||
|
@ -285,7 +285,7 @@ class YoutubeDL:
|
||||||
subtitles. The language can be prefixed with a "-" to
|
subtitles. The language can be prefixed with a "-" to
|
||||||
exclude it from the requested languages, e.g. ['all', '-live_chat']
|
exclude it from the requested languages, e.g. ['all', '-live_chat']
|
||||||
keepvideo: Keep the video file after post-processing
|
keepvideo: Keep the video file after post-processing
|
||||||
daterange: A DateRange object, download only if the upload_date is in the range.
|
daterange: A utils.DateRange object, download only if the upload_date is in the range.
|
||||||
skip_download: Skip the actual download of the video file
|
skip_download: Skip the actual download of the video file
|
||||||
cachedir: Location of the cache files in the filesystem.
|
cachedir: Location of the cache files in the filesystem.
|
||||||
False to disable filesystem cache.
|
False to disable filesystem cache.
|
||||||
|
@ -334,13 +334,13 @@ class YoutubeDL:
|
||||||
'auto' for elaborate guessing
|
'auto' for elaborate guessing
|
||||||
encoding: Use this encoding instead of the system-specified.
|
encoding: Use this encoding instead of the system-specified.
|
||||||
extract_flat: Whether to resolve and process url_results further
|
extract_flat: Whether to resolve and process url_results further
|
||||||
* False: Always process (default)
|
* False: Always process. Default for API
|
||||||
* True: Never process
|
* True: Never process
|
||||||
* 'in_playlist': Do not process inside playlist/multi_video
|
* 'in_playlist': Do not process inside playlist/multi_video
|
||||||
* 'discard': Always process, but don't return the result
|
* 'discard': Always process, but don't return the result
|
||||||
from inside playlist/multi_video
|
from inside playlist/multi_video
|
||||||
* 'discard_in_playlist': Same as "discard", but only for
|
* 'discard_in_playlist': Same as "discard", but only for
|
||||||
playlists (not multi_video)
|
playlists (not multi_video). Default for CLI
|
||||||
wait_for_video: If given, wait for scheduled streams to become available.
|
wait_for_video: If given, wait for scheduled streams to become available.
|
||||||
The value should be a tuple containing the range
|
The value should be a tuple containing the range
|
||||||
(min_secs, max_secs) to wait between retries
|
(min_secs, max_secs) to wait between retries
|
||||||
|
@ -420,7 +420,12 @@ class YoutubeDL:
|
||||||
- Raise utils.DownloadCancelled(msg) to abort remaining
|
- Raise utils.DownloadCancelled(msg) to abort remaining
|
||||||
downloads when a video is rejected.
|
downloads when a video is rejected.
|
||||||
match_filter_func in utils.py is one example for this.
|
match_filter_func in utils.py is one example for this.
|
||||||
no_color: Do not emit color codes in output.
|
color: A Dictionary with output stream names as keys
|
||||||
|
and their respective color policy as values.
|
||||||
|
Can also just be a single color policy,
|
||||||
|
in which case it applies to all outputs.
|
||||||
|
Valid stream names are 'stdout' and 'stderr'.
|
||||||
|
Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
|
||||||
geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
|
geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
|
||||||
HTTP header
|
HTTP header
|
||||||
geo_bypass_country:
|
geo_bypass_country:
|
||||||
|
@ -477,7 +482,7 @@ class YoutubeDL:
|
||||||
can also be used
|
can also be used
|
||||||
|
|
||||||
The following options are used by the extractors:
|
The following options are used by the extractors:
|
||||||
extractor_retries: Number of times to retry for known errors
|
extractor_retries: Number of times to retry for known errors (default: 3)
|
||||||
dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
|
dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
|
||||||
hls_split_discontinuity: Split HLS playlists to different formats at
|
hls_split_discontinuity: Split HLS playlists to different formats at
|
||||||
discontinuities such as ad breaks (default: False)
|
discontinuities such as ad breaks (default: False)
|
||||||
|
@ -542,6 +547,7 @@ class YoutubeDL:
|
||||||
data will be downloaded and processed by extractor.
|
data will be downloaded and processed by extractor.
|
||||||
You can reduce network I/O by disabling it if you don't
|
You can reduce network I/O by disabling it if you don't
|
||||||
care about HLS. (only for youtube)
|
care about HLS. (only for youtube)
|
||||||
|
no_color: Same as `color='no_color'`
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_NUMERIC_FIELDS = {
|
_NUMERIC_FIELDS = {
|
||||||
|
@ -608,9 +614,24 @@ def __init__(self, params=None, auto_init=True):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.write_debug(f'Failed to enable VT mode: {e}')
|
self.write_debug(f'Failed to enable VT mode: {e}')
|
||||||
|
|
||||||
|
if self.params.get('no_color'):
|
||||||
|
if self.params.get('color') is not None:
|
||||||
|
self.report_warning('Overwriting params from "color" with "no_color"')
|
||||||
|
self.params['color'] = 'no_color'
|
||||||
|
|
||||||
|
term_allow_color = os.environ.get('TERM', '').lower() != 'dumb'
|
||||||
|
|
||||||
|
def process_color_policy(stream):
|
||||||
|
stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
|
||||||
|
policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
|
||||||
|
if policy in ('auto', None):
|
||||||
|
return term_allow_color and supports_terminal_sequences(stream)
|
||||||
|
assert policy in ('always', 'never', 'no_color')
|
||||||
|
return {'always': True, 'never': False}.get(policy, policy)
|
||||||
|
|
||||||
self._allow_colors = Namespace(**{
|
self._allow_colors = Namespace(**{
|
||||||
type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
|
name: process_color_policy(stream)
|
||||||
for type_, stream in self._out_files.items_ if type_ != 'console'
|
for name, stream in self._out_files.items_ if name != 'console'
|
||||||
})
|
})
|
||||||
|
|
||||||
# The code is left like this to be reused for future deprecations
|
# The code is left like this to be reused for future deprecations
|
||||||
|
@ -743,7 +764,6 @@ def check_deprecated(param, option, suggestion):
|
||||||
when=when)
|
when=when)
|
||||||
|
|
||||||
self._setup_opener()
|
self._setup_opener()
|
||||||
register_socks_protocols()
|
|
||||||
|
|
||||||
def preload_download_archive(fn):
|
def preload_download_archive(fn):
|
||||||
"""Preload the archive, if any is specified"""
|
"""Preload the archive, if any is specified"""
|
||||||
|
@ -980,7 +1000,7 @@ def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_enc
|
||||||
text = text.encode(encoding, 'ignore').decode(encoding)
|
text = text.encode(encoding, 'ignore').decode(encoding)
|
||||||
if fallback is not None and text != original_text:
|
if fallback is not None and text != original_text:
|
||||||
text = fallback
|
text = fallback
|
||||||
return format_text(text, f) if allow_colors else text if fallback is None else fallback
|
return format_text(text, f) if allow_colors is True else text if fallback is None else fallback
|
||||||
|
|
||||||
def _format_out(self, *args, **kwargs):
|
def _format_out(self, *args, **kwargs):
|
||||||
return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
|
return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
|
||||||
|
@ -1083,7 +1103,7 @@ def _outtmpl_expandpath(outtmpl):
|
||||||
# correspondingly that is not what we want since we need to keep
|
# correspondingly that is not what we want since we need to keep
|
||||||
# '%%' intact for template dict substitution step. Working around
|
# '%%' intact for template dict substitution step. Working around
|
||||||
# with boundary-alike separator hack.
|
# with boundary-alike separator hack.
|
||||||
sep = ''.join(random.choices(ascii_letters, k=32))
|
sep = ''.join(random.choices(string.ascii_letters, k=32))
|
||||||
outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
|
outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
|
||||||
|
|
||||||
# outtmpl should be expand_path'ed before template dict substitution
|
# outtmpl should be expand_path'ed before template dict substitution
|
||||||
|
@ -1242,7 +1262,7 @@ def _dumpjson_default(obj):
|
||||||
return list(obj)
|
return list(obj)
|
||||||
return repr(obj)
|
return repr(obj)
|
||||||
|
|
||||||
class _ReplacementFormatter(Formatter):
|
class _ReplacementFormatter(string.Formatter):
|
||||||
def get_field(self, field_name, args, kwargs):
|
def get_field(self, field_name, args, kwargs):
|
||||||
if field_name.isdigit():
|
if field_name.isdigit():
|
||||||
return args[0], -1
|
return args[0], -1
|
||||||
|
@ -2072,86 +2092,86 @@ def syntax_error(note, start):
|
||||||
|
|
||||||
def _parse_filter(tokens):
|
def _parse_filter(tokens):
|
||||||
filter_parts = []
|
filter_parts = []
|
||||||
for type, string, start, _, _ in tokens:
|
for type, string_, start, _, _ in tokens:
|
||||||
if type == tokenize.OP and string == ']':
|
if type == tokenize.OP and string_ == ']':
|
||||||
return ''.join(filter_parts)
|
return ''.join(filter_parts)
|
||||||
else:
|
else:
|
||||||
filter_parts.append(string)
|
filter_parts.append(string_)
|
||||||
|
|
||||||
def _remove_unused_ops(tokens):
|
def _remove_unused_ops(tokens):
|
||||||
# Remove operators that we don't use and join them with the surrounding strings.
|
# Remove operators that we don't use and join them with the surrounding strings.
|
||||||
# E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
|
# E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
|
||||||
ALLOWED_OPS = ('/', '+', ',', '(', ')')
|
ALLOWED_OPS = ('/', '+', ',', '(', ')')
|
||||||
last_string, last_start, last_end, last_line = None, None, None, None
|
last_string, last_start, last_end, last_line = None, None, None, None
|
||||||
for type, string, start, end, line in tokens:
|
for type, string_, start, end, line in tokens:
|
||||||
if type == tokenize.OP and string == '[':
|
if type == tokenize.OP and string_ == '[':
|
||||||
if last_string:
|
if last_string:
|
||||||
yield tokenize.NAME, last_string, last_start, last_end, last_line
|
yield tokenize.NAME, last_string, last_start, last_end, last_line
|
||||||
last_string = None
|
last_string = None
|
||||||
yield type, string, start, end, line
|
yield type, string_, start, end, line
|
||||||
# everything inside brackets will be handled by _parse_filter
|
# everything inside brackets will be handled by _parse_filter
|
||||||
for type, string, start, end, line in tokens:
|
for type, string_, start, end, line in tokens:
|
||||||
yield type, string, start, end, line
|
yield type, string_, start, end, line
|
||||||
if type == tokenize.OP and string == ']':
|
if type == tokenize.OP and string_ == ']':
|
||||||
break
|
break
|
||||||
elif type == tokenize.OP and string in ALLOWED_OPS:
|
elif type == tokenize.OP and string_ in ALLOWED_OPS:
|
||||||
if last_string:
|
if last_string:
|
||||||
yield tokenize.NAME, last_string, last_start, last_end, last_line
|
yield tokenize.NAME, last_string, last_start, last_end, last_line
|
||||||
last_string = None
|
last_string = None
|
||||||
yield type, string, start, end, line
|
yield type, string_, start, end, line
|
||||||
elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
|
elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
|
||||||
if not last_string:
|
if not last_string:
|
||||||
last_string = string
|
last_string = string_
|
||||||
last_start = start
|
last_start = start
|
||||||
last_end = end
|
last_end = end
|
||||||
else:
|
else:
|
||||||
last_string += string
|
last_string += string_
|
||||||
if last_string:
|
if last_string:
|
||||||
yield tokenize.NAME, last_string, last_start, last_end, last_line
|
yield tokenize.NAME, last_string, last_start, last_end, last_line
|
||||||
|
|
||||||
def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
|
def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
|
||||||
selectors = []
|
selectors = []
|
||||||
current_selector = None
|
current_selector = None
|
||||||
for type, string, start, _, _ in tokens:
|
for type, string_, start, _, _ in tokens:
|
||||||
# ENCODING is only defined in python 3.x
|
# ENCODING is only defined in python 3.x
|
||||||
if type == getattr(tokenize, 'ENCODING', None):
|
if type == getattr(tokenize, 'ENCODING', None):
|
||||||
continue
|
continue
|
||||||
elif type in [tokenize.NAME, tokenize.NUMBER]:
|
elif type in [tokenize.NAME, tokenize.NUMBER]:
|
||||||
current_selector = FormatSelector(SINGLE, string, [])
|
current_selector = FormatSelector(SINGLE, string_, [])
|
||||||
elif type == tokenize.OP:
|
elif type == tokenize.OP:
|
||||||
if string == ')':
|
if string_ == ')':
|
||||||
if not inside_group:
|
if not inside_group:
|
||||||
# ')' will be handled by the parentheses group
|
# ')' will be handled by the parentheses group
|
||||||
tokens.restore_last_token()
|
tokens.restore_last_token()
|
||||||
break
|
break
|
||||||
elif inside_merge and string in ['/', ',']:
|
elif inside_merge and string_ in ['/', ',']:
|
||||||
tokens.restore_last_token()
|
tokens.restore_last_token()
|
||||||
break
|
break
|
||||||
elif inside_choice and string == ',':
|
elif inside_choice and string_ == ',':
|
||||||
tokens.restore_last_token()
|
tokens.restore_last_token()
|
||||||
break
|
break
|
||||||
elif string == ',':
|
elif string_ == ',':
|
||||||
if not current_selector:
|
if not current_selector:
|
||||||
raise syntax_error('"," must follow a format selector', start)
|
raise syntax_error('"," must follow a format selector', start)
|
||||||
selectors.append(current_selector)
|
selectors.append(current_selector)
|
||||||
current_selector = None
|
current_selector = None
|
||||||
elif string == '/':
|
elif string_ == '/':
|
||||||
if not current_selector:
|
if not current_selector:
|
||||||
raise syntax_error('"/" must follow a format selector', start)
|
raise syntax_error('"/" must follow a format selector', start)
|
||||||
first_choice = current_selector
|
first_choice = current_selector
|
||||||
second_choice = _parse_format_selection(tokens, inside_choice=True)
|
second_choice = _parse_format_selection(tokens, inside_choice=True)
|
||||||
current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
|
current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
|
||||||
elif string == '[':
|
elif string_ == '[':
|
||||||
if not current_selector:
|
if not current_selector:
|
||||||
current_selector = FormatSelector(SINGLE, 'best', [])
|
current_selector = FormatSelector(SINGLE, 'best', [])
|
||||||
format_filter = _parse_filter(tokens)
|
format_filter = _parse_filter(tokens)
|
||||||
current_selector.filters.append(format_filter)
|
current_selector.filters.append(format_filter)
|
||||||
elif string == '(':
|
elif string_ == '(':
|
||||||
if current_selector:
|
if current_selector:
|
||||||
raise syntax_error('Unexpected "("', start)
|
raise syntax_error('Unexpected "("', start)
|
||||||
group = _parse_format_selection(tokens, inside_group=True)
|
group = _parse_format_selection(tokens, inside_group=True)
|
||||||
current_selector = FormatSelector(GROUP, group, [])
|
current_selector = FormatSelector(GROUP, group, [])
|
||||||
elif string == '+':
|
elif string_ == '+':
|
||||||
if not current_selector:
|
if not current_selector:
|
||||||
raise syntax_error('Unexpected "+"', start)
|
raise syntax_error('Unexpected "+"', start)
|
||||||
selector_1 = current_selector
|
selector_1 = current_selector
|
||||||
|
@ -2160,7 +2180,7 @@ def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, ins
|
||||||
raise syntax_error('Expected a selector', start)
|
raise syntax_error('Expected a selector', start)
|
||||||
current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
|
current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
|
||||||
else:
|
else:
|
||||||
raise syntax_error(f'Operator not recognized: "{string}"', start)
|
raise syntax_error(f'Operator not recognized: "{string_}"', start)
|
||||||
elif type == tokenize.ENDMARKER:
|
elif type == tokenize.ENDMARKER:
|
||||||
break
|
break
|
||||||
if current_selector:
|
if current_selector:
|
||||||
|
@ -2386,8 +2406,10 @@ def restore_last_token(self):
|
||||||
|
|
||||||
def _calc_headers(self, info_dict):
|
def _calc_headers(self, info_dict):
|
||||||
res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
|
res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
|
||||||
|
if 'Youtubedl-No-Compression' in res: # deprecated
|
||||||
cookies = self._calc_cookies(info_dict['url'])
|
res.pop('Youtubedl-No-Compression', None)
|
||||||
|
res['Accept-Encoding'] = 'identity'
|
||||||
|
cookies = self.cookiejar.get_cookie_header(info_dict['url'])
|
||||||
if cookies:
|
if cookies:
|
||||||
res['Cookie'] = cookies
|
res['Cookie'] = cookies
|
||||||
|
|
||||||
|
@ -2399,9 +2421,8 @@ def _calc_headers(self, info_dict):
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def _calc_cookies(self, url):
|
def _calc_cookies(self, url):
|
||||||
pr = sanitized_Request(url)
|
self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
|
||||||
self.cookiejar.add_cookie_header(pr)
|
return self.cookiejar.get_cookie_header(url)
|
||||||
return pr.get_header('Cookie')
|
|
||||||
|
|
||||||
def _sort_thumbnails(self, thumbnails):
|
def _sort_thumbnails(self, thumbnails):
|
||||||
thumbnails.sort(key=lambda t: (
|
thumbnails.sort(key=lambda t: (
|
||||||
|
@ -2728,21 +2749,22 @@ def is_wellformed(f):
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
format_selector = self.format_selector
|
format_selector = self.format_selector
|
||||||
if format_selector is None:
|
|
||||||
req_format = self._default_format_spec(info_dict, download=download)
|
|
||||||
self.write_debug('Default format spec: %s' % req_format)
|
|
||||||
format_selector = self.build_format_selector(req_format)
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
if interactive_format_selection:
|
if interactive_format_selection:
|
||||||
req_format = input(
|
req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
|
||||||
self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
|
+ '(Press ENTER for default, or Ctrl+C to quit)'
|
||||||
|
+ self._format_screen(': ', self.Styles.EMPHASIS))
|
||||||
try:
|
try:
|
||||||
format_selector = self.build_format_selector(req_format)
|
format_selector = self.build_format_selector(req_format) if req_format else None
|
||||||
except SyntaxError as err:
|
except SyntaxError as err:
|
||||||
self.report_error(err, tb=False, is_error=False)
|
self.report_error(err, tb=False, is_error=False)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if format_selector is None:
|
||||||
|
req_format = self._default_format_spec(info_dict, download=download)
|
||||||
|
self.write_debug(f'Default format spec: {req_format}')
|
||||||
|
format_selector = self.build_format_selector(req_format)
|
||||||
|
|
||||||
formats_to_download = list(format_selector({
|
formats_to_download = list(format_selector({
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
|
'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
|
||||||
|
@ -2902,7 +2924,7 @@ def format_tmpl(tmpl):
|
||||||
|
|
||||||
fmt = '%({})s'
|
fmt = '%({})s'
|
||||||
if tmpl.startswith('{'):
|
if tmpl.startswith('{'):
|
||||||
tmpl = f'.{tmpl}'
|
tmpl, fmt = f'.{tmpl}', '%({})j'
|
||||||
if tmpl.endswith('='):
|
if tmpl.endswith('='):
|
||||||
tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
|
tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
|
||||||
return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
|
return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
|
||||||
|
@ -2941,7 +2963,8 @@ def print_field(field, actual_field=None, optional=False):
|
||||||
print_field('url', 'urls')
|
print_field('url', 'urls')
|
||||||
print_field('thumbnail', optional=True)
|
print_field('thumbnail', optional=True)
|
||||||
print_field('description', optional=True)
|
print_field('description', optional=True)
|
||||||
print_field('filename', optional=True)
|
if filename:
|
||||||
|
print_field('filename')
|
||||||
if self.params.get('forceduration') and info_copy.get('duration') is not None:
|
if self.params.get('forceduration') and info_copy.get('duration') is not None:
|
||||||
self.to_stdout(formatSeconds(info_copy['duration']))
|
self.to_stdout(formatSeconds(info_copy['duration']))
|
||||||
print_field('format')
|
print_field('format')
|
||||||
|
@ -3422,8 +3445,8 @@ def sanitize_info(info_dict, remove_private_keys=False):
|
||||||
if remove_private_keys:
|
if remove_private_keys:
|
||||||
reject = lambda k, v: v is None or k.startswith('__') or k in {
|
reject = lambda k, v: v is None or k.startswith('__') or k in {
|
||||||
'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
|
'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
|
||||||
'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
|
'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
|
||||||
'_format_sort_fields',
|
'playlist_autonumber', '_format_sort_fields',
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
reject = lambda k, v: False
|
reject = lambda k, v: False
|
||||||
|
@ -3492,7 +3515,7 @@ def run_pp(self, pp, infodict):
|
||||||
*files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
|
*files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
|
||||||
return infodict
|
return infodict
|
||||||
|
|
||||||
def run_all_pps(self, key, info, *, additional_pps=None, fatal=True):
|
def run_all_pps(self, key, info, *, additional_pps=None):
|
||||||
if key != 'video':
|
if key != 'video':
|
||||||
self._forceprint(key, info)
|
self._forceprint(key, info)
|
||||||
for pp in (additional_pps or []) + self._pps[key]:
|
for pp in (additional_pps or []) + self._pps[key]:
|
||||||
|
@ -3771,9 +3794,14 @@ def print_debug_header(self):
|
||||||
|
|
||||||
def get_encoding(stream):
|
def get_encoding(stream):
|
||||||
ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
|
ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
|
||||||
|
additional_info = []
|
||||||
|
if os.environ.get('TERM', '').lower() == 'dumb':
|
||||||
|
additional_info.append('dumb')
|
||||||
if not supports_terminal_sequences(stream):
|
if not supports_terminal_sequences(stream):
|
||||||
from .utils import WINDOWS_VT_MODE # Must be imported locally
|
from .utils import WINDOWS_VT_MODE # Must be imported locally
|
||||||
ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
|
additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
|
||||||
|
if additional_info:
|
||||||
|
ret = f'{ret} ({",".join(additional_info)})'
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
|
encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
|
||||||
|
@ -3998,7 +4026,7 @@ def _write_subtitles(self, info_dict, filename):
|
||||||
# that way it will silently go on when used with unsupporting IE
|
# that way it will silently go on when used with unsupporting IE
|
||||||
return ret
|
return ret
|
||||||
elif not subtitles:
|
elif not subtitles:
|
||||||
self.to_screen('[info] There\'s no subtitles for the requested languages')
|
self.to_screen('[info] There are no subtitles for the requested languages')
|
||||||
return ret
|
return ret
|
||||||
sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
|
sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
|
||||||
if not sub_filename_base:
|
if not sub_filename_base:
|
||||||
|
@ -4052,7 +4080,7 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None
|
||||||
if write_all or self.params.get('writethumbnail', False):
|
if write_all or self.params.get('writethumbnail', False):
|
||||||
thumbnails = info_dict.get('thumbnails') or []
|
thumbnails = info_dict.get('thumbnails') or []
|
||||||
if not thumbnails:
|
if not thumbnails:
|
||||||
self.to_screen(f'[info] There\'s no {label} thumbnails to download')
|
self.to_screen(f'[info] There are no {label} thumbnails to download')
|
||||||
return ret
|
return ret
|
||||||
multiple = write_all and len(thumbnails) > 1
|
multiple = write_all and len(thumbnails) > 1
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
import traceback
|
||||||
|
|
||||||
from .compat import compat_shlex_quote
|
from .compat import compat_shlex_quote
|
||||||
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
|
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
|
||||||
|
@ -451,6 +452,10 @@ def metadataparser_actions(f):
|
||||||
elif ed and proto == 'default':
|
elif ed and proto == 'default':
|
||||||
default_downloader = ed.get_basename()
|
default_downloader = ed.get_basename()
|
||||||
|
|
||||||
|
for policy in opts.color.values():
|
||||||
|
if policy not in ('always', 'auto', 'no_color', 'never'):
|
||||||
|
raise ValueError(f'"{policy}" is not a valid color policy')
|
||||||
|
|
||||||
warnings, deprecation_warnings = [], []
|
warnings, deprecation_warnings = [], []
|
||||||
|
|
||||||
# Common mistake: -f best
|
# Common mistake: -f best
|
||||||
|
@ -909,7 +914,7 @@ def parse_options(argv=None):
|
||||||
'playlist_items': opts.playlist_items,
|
'playlist_items': opts.playlist_items,
|
||||||
'xattr_set_filesize': opts.xattr_set_filesize,
|
'xattr_set_filesize': opts.xattr_set_filesize,
|
||||||
'match_filter': opts.match_filter,
|
'match_filter': opts.match_filter,
|
||||||
'no_color': opts.no_color,
|
'color': opts.color,
|
||||||
'ffmpeg_location': opts.ffmpeg_location,
|
'ffmpeg_location': opts.ffmpeg_location,
|
||||||
'hls_prefer_native': opts.hls_prefer_native,
|
'hls_prefer_native': opts.hls_prefer_native,
|
||||||
'hls_use_mpegts': opts.hls_use_mpegts,
|
'hls_use_mpegts': opts.hls_use_mpegts,
|
||||||
|
@ -953,14 +958,18 @@ def _real_main(argv=None):
|
||||||
if opts.rm_cachedir:
|
if opts.rm_cachedir:
|
||||||
ydl.cache.remove()
|
ydl.cache.remove()
|
||||||
|
|
||||||
updater = Updater(ydl, opts.update_self if isinstance(opts.update_self, str) else None)
|
try:
|
||||||
if opts.update_self and updater.update() and actual_use:
|
updater = Updater(ydl, opts.update_self)
|
||||||
if updater.cmd:
|
if opts.update_self and updater.update() and actual_use:
|
||||||
return updater.restart()
|
if updater.cmd:
|
||||||
# This code is reachable only for zip variant in py < 3.10
|
return updater.restart()
|
||||||
# It makes sense to exit here, but the old behavior is to continue
|
# This code is reachable only for zip variant in py < 3.10
|
||||||
ydl.report_warning('Restart yt-dlp to use the updated version')
|
# It makes sense to exit here, but the old behavior is to continue
|
||||||
# return 100, 'ERROR: The program must exit for the update to complete'
|
ydl.report_warning('Restart yt-dlp to use the updated version')
|
||||||
|
# return 100, 'ERROR: The program must exit for the update to complete'
|
||||||
|
except Exception:
|
||||||
|
traceback.print_exc()
|
||||||
|
ydl._download_retcode = 100
|
||||||
|
|
||||||
if not actual_use:
|
if not actual_use:
|
||||||
if pre_process:
|
if pre_process:
|
||||||
|
|
7
yt_dlp/compat/urllib/__init__.py
Normal file
7
yt_dlp/compat/urllib/__init__.py
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
# flake8: noqa: F405
|
||||||
|
from urllib import * # noqa: F403
|
||||||
|
|
||||||
|
from ..compat_utils import passthrough_module
|
||||||
|
|
||||||
|
passthrough_module(__name__, 'urllib')
|
||||||
|
del passthrough_module
|
40
yt_dlp/compat/urllib/request.py
Normal file
40
yt_dlp/compat/urllib/request.py
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
# flake8: noqa: F405
|
||||||
|
from urllib.request import * # noqa: F403
|
||||||
|
|
||||||
|
from ..compat_utils import passthrough_module
|
||||||
|
|
||||||
|
passthrough_module(__name__, 'urllib.request')
|
||||||
|
del passthrough_module
|
||||||
|
|
||||||
|
|
||||||
|
from .. import compat_os_name
|
||||||
|
|
||||||
|
if compat_os_name == 'nt':
|
||||||
|
# On older python versions, proxies are extracted from Windows registry erroneously. [1]
|
||||||
|
# If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2]
|
||||||
|
# It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade
|
||||||
|
# it to http on these older python versions to avoid issues
|
||||||
|
# This also applies for ftp proxy type, as ftp:// proxy scheme is not supported.
|
||||||
|
# 1: https://github.com/python/cpython/issues/86793
|
||||||
|
# 2: https://github.com/python/cpython/blob/51f1ae5ceb0673316c4e4b0175384e892e33cc6e/Lib/urllib/request.py#L2683-L2698
|
||||||
|
import sys
|
||||||
|
from urllib.request import getproxies_environment, getproxies_registry
|
||||||
|
|
||||||
|
def getproxies_registry_patched():
|
||||||
|
proxies = getproxies_registry()
|
||||||
|
if (
|
||||||
|
sys.version_info >= (3, 10, 5) # https://docs.python.org/3.10/whatsnew/changelog.html#python-3-10-5-final
|
||||||
|
or (3, 9, 13) <= sys.version_info < (3, 10) # https://docs.python.org/3.9/whatsnew/changelog.html#python-3-9-13-final
|
||||||
|
):
|
||||||
|
return proxies
|
||||||
|
|
||||||
|
for scheme in ('https', 'ftp'):
|
||||||
|
if scheme in proxies and proxies[scheme].startswith(f'{scheme}://'):
|
||||||
|
proxies[scheme] = 'http' + proxies[scheme][len(scheme):]
|
||||||
|
|
||||||
|
return proxies
|
||||||
|
|
||||||
|
def getproxies():
|
||||||
|
return getproxies_environment() or getproxies_registry_patched()
|
||||||
|
|
||||||
|
del compat_os_name
|
|
@ -1,7 +1,9 @@
|
||||||
import base64
|
import base64
|
||||||
|
import collections
|
||||||
import contextlib
|
import contextlib
|
||||||
import http.cookiejar
|
import http.cookiejar
|
||||||
import http.cookies
|
import http.cookies
|
||||||
|
import io
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
@ -11,6 +13,7 @@
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
|
import urllib.request
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
from enum import Enum, auto
|
from enum import Enum, auto
|
||||||
from hashlib import pbkdf2_hmac
|
from hashlib import pbkdf2_hmac
|
||||||
|
@ -29,11 +32,14 @@
|
||||||
from .minicurses import MultilinePrinter, QuietMultilinePrinter
|
from .minicurses import MultilinePrinter, QuietMultilinePrinter
|
||||||
from .utils import (
|
from .utils import (
|
||||||
Popen,
|
Popen,
|
||||||
YoutubeDLCookieJar,
|
|
||||||
error_to_str,
|
error_to_str,
|
||||||
|
escape_url,
|
||||||
expand_path,
|
expand_path,
|
||||||
is_path_like,
|
is_path_like,
|
||||||
|
sanitize_url,
|
||||||
|
str_or_none,
|
||||||
try_call,
|
try_call,
|
||||||
|
write_string,
|
||||||
)
|
)
|
||||||
|
|
||||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
|
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
|
||||||
|
@ -347,7 +353,9 @@ class ChromeCookieDecryptor:
|
||||||
Linux:
|
Linux:
|
||||||
- cookies are either v10 or v11
|
- cookies are either v10 or v11
|
||||||
- v10: AES-CBC encrypted with a fixed key
|
- v10: AES-CBC encrypted with a fixed key
|
||||||
|
- also attempts empty password if decryption fails
|
||||||
- v11: AES-CBC encrypted with an OS protected key (keyring)
|
- v11: AES-CBC encrypted with an OS protected key (keyring)
|
||||||
|
- also attempts empty password if decryption fails
|
||||||
- v11 keys can be stored in various places depending on the activate desktop environment [2]
|
- v11 keys can be stored in various places depending on the activate desktop environment [2]
|
||||||
|
|
||||||
Mac:
|
Mac:
|
||||||
|
@ -362,7 +370,7 @@ class ChromeCookieDecryptor:
|
||||||
|
|
||||||
Sources:
|
Sources:
|
||||||
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
|
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
|
||||||
- [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
|
- [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_linux.cc
|
||||||
- KeyStorageLinux::CreateService
|
- KeyStorageLinux::CreateService
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -384,6 +392,7 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
|
||||||
def __init__(self, browser_keyring_name, logger, *, keyring=None):
|
def __init__(self, browser_keyring_name, logger, *, keyring=None):
|
||||||
self._logger = logger
|
self._logger = logger
|
||||||
self._v10_key = self.derive_key(b'peanuts')
|
self._v10_key = self.derive_key(b'peanuts')
|
||||||
|
self._empty_key = self.derive_key(b'')
|
||||||
self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
|
self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
|
||||||
self._browser_keyring_name = browser_keyring_name
|
self._browser_keyring_name = browser_keyring_name
|
||||||
self._keyring = keyring
|
self._keyring = keyring
|
||||||
|
@ -396,25 +405,36 @@ def _v11_key(self):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def derive_key(password):
|
def derive_key(password):
|
||||||
# values from
|
# values from
|
||||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
|
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_linux.cc
|
||||||
return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
|
return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
|
||||||
|
|
||||||
def decrypt(self, encrypted_value):
|
def decrypt(self, encrypted_value):
|
||||||
|
"""
|
||||||
|
|
||||||
|
following the same approach as the fix in [1]: if cookies fail to decrypt then attempt to decrypt
|
||||||
|
with an empty password. The failure detection is not the same as what chromium uses so the
|
||||||
|
results won't be perfect
|
||||||
|
|
||||||
|
References:
|
||||||
|
- [1] https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/
|
||||||
|
- a bugfix to try an empty password as a fallback
|
||||||
|
"""
|
||||||
version = encrypted_value[:3]
|
version = encrypted_value[:3]
|
||||||
ciphertext = encrypted_value[3:]
|
ciphertext = encrypted_value[3:]
|
||||||
|
|
||||||
if version == b'v10':
|
if version == b'v10':
|
||||||
self._cookie_counts['v10'] += 1
|
self._cookie_counts['v10'] += 1
|
||||||
return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
|
return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger)
|
||||||
|
|
||||||
elif version == b'v11':
|
elif version == b'v11':
|
||||||
self._cookie_counts['v11'] += 1
|
self._cookie_counts['v11'] += 1
|
||||||
if self._v11_key is None:
|
if self._v11_key is None:
|
||||||
self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
|
self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
|
||||||
return None
|
return None
|
||||||
return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
|
return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
|
||||||
self._cookie_counts['other'] += 1
|
self._cookie_counts['other'] += 1
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@ -429,7 +449,7 @@ def __init__(self, browser_keyring_name, logger):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def derive_key(password):
|
def derive_key(password):
|
||||||
# values from
|
# values from
|
||||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
|
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
|
||||||
return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
|
return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
|
||||||
|
|
||||||
def decrypt(self, encrypted_value):
|
def decrypt(self, encrypted_value):
|
||||||
|
@ -442,12 +462,12 @@ def decrypt(self, encrypted_value):
|
||||||
self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
|
self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
|
return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self._cookie_counts['other'] += 1
|
self._cookie_counts['other'] += 1
|
||||||
# other prefixes are considered 'old data' which were stored as plaintext
|
# other prefixes are considered 'old data' which were stored as plaintext
|
||||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
|
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
|
||||||
return encrypted_value
|
return encrypted_value
|
||||||
|
|
||||||
|
|
||||||
|
@ -467,7 +487,7 @@ def decrypt(self, encrypted_value):
|
||||||
self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
|
self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
|
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
|
||||||
# kNonceLength
|
# kNonceLength
|
||||||
nonce_length = 96 // 8
|
nonce_length = 96 // 8
|
||||||
# boringssl
|
# boringssl
|
||||||
|
@ -484,23 +504,27 @@ def decrypt(self, encrypted_value):
|
||||||
else:
|
else:
|
||||||
self._cookie_counts['other'] += 1
|
self._cookie_counts['other'] += 1
|
||||||
# any other prefix means the data is DPAPI encrypted
|
# any other prefix means the data is DPAPI encrypted
|
||||||
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
|
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
|
||||||
return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
|
return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
|
||||||
|
|
||||||
|
|
||||||
def _extract_safari_cookies(profile, logger):
|
def _extract_safari_cookies(profile, logger):
|
||||||
if profile is not None:
|
|
||||||
logger.error('safari does not support profiles')
|
|
||||||
if sys.platform != 'darwin':
|
if sys.platform != 'darwin':
|
||||||
raise ValueError(f'unsupported platform: {sys.platform}')
|
raise ValueError(f'unsupported platform: {sys.platform}')
|
||||||
|
|
||||||
cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
|
if profile:
|
||||||
|
cookies_path = os.path.expanduser(profile)
|
||||||
if not os.path.isfile(cookies_path):
|
|
||||||
logger.debug('Trying secondary cookie location')
|
|
||||||
cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
|
|
||||||
if not os.path.isfile(cookies_path):
|
if not os.path.isfile(cookies_path):
|
||||||
raise FileNotFoundError('could not find safari cookies database')
|
raise FileNotFoundError('custom safari cookies database not found')
|
||||||
|
|
||||||
|
else:
|
||||||
|
cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
|
||||||
|
|
||||||
|
if not os.path.isfile(cookies_path):
|
||||||
|
logger.debug('Trying secondary cookie location')
|
||||||
|
cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
|
||||||
|
if not os.path.isfile(cookies_path):
|
||||||
|
raise FileNotFoundError('could not find safari cookies database')
|
||||||
|
|
||||||
with open(cookies_path, 'rb') as f:
|
with open(cookies_path, 'rb') as f:
|
||||||
cookies_data = f.read()
|
cookies_data = f.read()
|
||||||
|
@ -663,27 +687,35 @@ class _LinuxDesktopEnvironment(Enum):
|
||||||
"""
|
"""
|
||||||
OTHER = auto()
|
OTHER = auto()
|
||||||
CINNAMON = auto()
|
CINNAMON = auto()
|
||||||
|
DEEPIN = auto()
|
||||||
GNOME = auto()
|
GNOME = auto()
|
||||||
KDE = auto()
|
KDE3 = auto()
|
||||||
|
KDE4 = auto()
|
||||||
|
KDE5 = auto()
|
||||||
|
KDE6 = auto()
|
||||||
PANTHEON = auto()
|
PANTHEON = auto()
|
||||||
|
UKUI = auto()
|
||||||
UNITY = auto()
|
UNITY = auto()
|
||||||
XFCE = auto()
|
XFCE = auto()
|
||||||
|
LXQT = auto()
|
||||||
|
|
||||||
|
|
||||||
class _LinuxKeyring(Enum):
|
class _LinuxKeyring(Enum):
|
||||||
"""
|
"""
|
||||||
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
|
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.h
|
||||||
SelectedLinuxBackend
|
SelectedLinuxBackend
|
||||||
"""
|
"""
|
||||||
KWALLET = auto()
|
KWALLET4 = auto() # this value is just called KWALLET in the chromium source but it is for KDE4 only
|
||||||
GNOMEKEYRING = auto()
|
KWALLET5 = auto()
|
||||||
BASICTEXT = auto()
|
KWALLET6 = auto()
|
||||||
|
GNOME_KEYRING = auto()
|
||||||
|
BASIC_TEXT = auto()
|
||||||
|
|
||||||
|
|
||||||
SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
|
SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
|
||||||
|
|
||||||
|
|
||||||
def _get_linux_desktop_environment(env):
|
def _get_linux_desktop_environment(env, logger):
|
||||||
"""
|
"""
|
||||||
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
|
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
|
||||||
GetDesktopEnvironment
|
GetDesktopEnvironment
|
||||||
|
@ -698,51 +730,97 @@ def _get_linux_desktop_environment(env):
|
||||||
return _LinuxDesktopEnvironment.GNOME
|
return _LinuxDesktopEnvironment.GNOME
|
||||||
else:
|
else:
|
||||||
return _LinuxDesktopEnvironment.UNITY
|
return _LinuxDesktopEnvironment.UNITY
|
||||||
|
elif xdg_current_desktop == 'Deepin':
|
||||||
|
return _LinuxDesktopEnvironment.DEEPIN
|
||||||
elif xdg_current_desktop == 'GNOME':
|
elif xdg_current_desktop == 'GNOME':
|
||||||
return _LinuxDesktopEnvironment.GNOME
|
return _LinuxDesktopEnvironment.GNOME
|
||||||
elif xdg_current_desktop == 'X-Cinnamon':
|
elif xdg_current_desktop == 'X-Cinnamon':
|
||||||
return _LinuxDesktopEnvironment.CINNAMON
|
return _LinuxDesktopEnvironment.CINNAMON
|
||||||
elif xdg_current_desktop == 'KDE':
|
elif xdg_current_desktop == 'KDE':
|
||||||
return _LinuxDesktopEnvironment.KDE
|
kde_version = env.get('KDE_SESSION_VERSION', None)
|
||||||
|
if kde_version == '5':
|
||||||
|
return _LinuxDesktopEnvironment.KDE5
|
||||||
|
elif kde_version == '6':
|
||||||
|
return _LinuxDesktopEnvironment.KDE6
|
||||||
|
elif kde_version == '4':
|
||||||
|
return _LinuxDesktopEnvironment.KDE4
|
||||||
|
else:
|
||||||
|
logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
|
||||||
|
return _LinuxDesktopEnvironment.KDE4
|
||||||
elif xdg_current_desktop == 'Pantheon':
|
elif xdg_current_desktop == 'Pantheon':
|
||||||
return _LinuxDesktopEnvironment.PANTHEON
|
return _LinuxDesktopEnvironment.PANTHEON
|
||||||
elif xdg_current_desktop == 'XFCE':
|
elif xdg_current_desktop == 'XFCE':
|
||||||
return _LinuxDesktopEnvironment.XFCE
|
return _LinuxDesktopEnvironment.XFCE
|
||||||
|
elif xdg_current_desktop == 'UKUI':
|
||||||
|
return _LinuxDesktopEnvironment.UKUI
|
||||||
|
elif xdg_current_desktop == 'LXQt':
|
||||||
|
return _LinuxDesktopEnvironment.LXQT
|
||||||
|
else:
|
||||||
|
logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
|
||||||
|
|
||||||
elif desktop_session is not None:
|
elif desktop_session is not None:
|
||||||
if desktop_session in ('mate', 'gnome'):
|
if desktop_session == 'deepin':
|
||||||
|
return _LinuxDesktopEnvironment.DEEPIN
|
||||||
|
elif desktop_session in ('mate', 'gnome'):
|
||||||
return _LinuxDesktopEnvironment.GNOME
|
return _LinuxDesktopEnvironment.GNOME
|
||||||
elif 'kde' in desktop_session:
|
elif desktop_session in ('kde4', 'kde-plasma'):
|
||||||
return _LinuxDesktopEnvironment.KDE
|
return _LinuxDesktopEnvironment.KDE4
|
||||||
elif 'xfce' in desktop_session:
|
elif desktop_session == 'kde':
|
||||||
|
if 'KDE_SESSION_VERSION' in env:
|
||||||
|
return _LinuxDesktopEnvironment.KDE4
|
||||||
|
else:
|
||||||
|
return _LinuxDesktopEnvironment.KDE3
|
||||||
|
elif 'xfce' in desktop_session or desktop_session == 'xubuntu':
|
||||||
return _LinuxDesktopEnvironment.XFCE
|
return _LinuxDesktopEnvironment.XFCE
|
||||||
|
elif desktop_session == 'ukui':
|
||||||
|
return _LinuxDesktopEnvironment.UKUI
|
||||||
|
else:
|
||||||
|
logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"')
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if 'GNOME_DESKTOP_SESSION_ID' in env:
|
if 'GNOME_DESKTOP_SESSION_ID' in env:
|
||||||
return _LinuxDesktopEnvironment.GNOME
|
return _LinuxDesktopEnvironment.GNOME
|
||||||
elif 'KDE_FULL_SESSION' in env:
|
elif 'KDE_FULL_SESSION' in env:
|
||||||
return _LinuxDesktopEnvironment.KDE
|
if 'KDE_SESSION_VERSION' in env:
|
||||||
|
return _LinuxDesktopEnvironment.KDE4
|
||||||
|
else:
|
||||||
|
return _LinuxDesktopEnvironment.KDE3
|
||||||
return _LinuxDesktopEnvironment.OTHER
|
return _LinuxDesktopEnvironment.OTHER
|
||||||
|
|
||||||
|
|
||||||
def _choose_linux_keyring(logger):
|
def _choose_linux_keyring(logger):
|
||||||
"""
|
"""
|
||||||
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
|
SelectBackend in [1]
|
||||||
SelectBackend
|
|
||||||
|
There is currently support for forcing chromium to use BASIC_TEXT by creating a file called
|
||||||
|
`Disable Local Encryption` [1] in the user data dir. The function to write this file (`WriteBackendUse()` [1])
|
||||||
|
does not appear to be called anywhere other than in tests, so the user would have to create this file manually
|
||||||
|
and so would be aware enough to tell yt-dlp to use the BASIC_TEXT keyring.
|
||||||
|
|
||||||
|
References:
|
||||||
|
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.cc
|
||||||
"""
|
"""
|
||||||
desktop_environment = _get_linux_desktop_environment(os.environ)
|
desktop_environment = _get_linux_desktop_environment(os.environ, logger)
|
||||||
logger.debug(f'detected desktop environment: {desktop_environment.name}')
|
logger.debug(f'detected desktop environment: {desktop_environment.name}')
|
||||||
if desktop_environment == _LinuxDesktopEnvironment.KDE:
|
if desktop_environment == _LinuxDesktopEnvironment.KDE4:
|
||||||
linux_keyring = _LinuxKeyring.KWALLET
|
linux_keyring = _LinuxKeyring.KWALLET4
|
||||||
elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
|
elif desktop_environment == _LinuxDesktopEnvironment.KDE5:
|
||||||
linux_keyring = _LinuxKeyring.BASICTEXT
|
linux_keyring = _LinuxKeyring.KWALLET5
|
||||||
|
elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
|
||||||
|
linux_keyring = _LinuxKeyring.KWALLET6
|
||||||
|
elif desktop_environment in (
|
||||||
|
_LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER
|
||||||
|
):
|
||||||
|
linux_keyring = _LinuxKeyring.BASIC_TEXT
|
||||||
else:
|
else:
|
||||||
linux_keyring = _LinuxKeyring.GNOMEKEYRING
|
linux_keyring = _LinuxKeyring.GNOME_KEYRING
|
||||||
return linux_keyring
|
return linux_keyring
|
||||||
|
|
||||||
|
|
||||||
def _get_kwallet_network_wallet(logger):
|
def _get_kwallet_network_wallet(keyring, logger):
|
||||||
""" The name of the wallet used to store network passwords.
|
""" The name of the wallet used to store network passwords.
|
||||||
|
|
||||||
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
|
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/kwallet_dbus.cc
|
||||||
KWalletDBus::NetworkWallet
|
KWalletDBus::NetworkWallet
|
||||||
which does a dbus call to the following function:
|
which does a dbus call to the following function:
|
||||||
https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
|
https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
|
||||||
|
@ -750,10 +828,22 @@ def _get_kwallet_network_wallet(logger):
|
||||||
"""
|
"""
|
||||||
default_wallet = 'kdewallet'
|
default_wallet = 'kdewallet'
|
||||||
try:
|
try:
|
||||||
|
if keyring == _LinuxKeyring.KWALLET4:
|
||||||
|
service_name = 'org.kde.kwalletd'
|
||||||
|
wallet_path = '/modules/kwalletd'
|
||||||
|
elif keyring == _LinuxKeyring.KWALLET5:
|
||||||
|
service_name = 'org.kde.kwalletd5'
|
||||||
|
wallet_path = '/modules/kwalletd5'
|
||||||
|
elif keyring == _LinuxKeyring.KWALLET6:
|
||||||
|
service_name = 'org.kde.kwalletd6'
|
||||||
|
wallet_path = '/modules/kwalletd6'
|
||||||
|
else:
|
||||||
|
raise ValueError(keyring)
|
||||||
|
|
||||||
stdout, _, returncode = Popen.run([
|
stdout, _, returncode = Popen.run([
|
||||||
'dbus-send', '--session', '--print-reply=literal',
|
'dbus-send', '--session', '--print-reply=literal',
|
||||||
'--dest=org.kde.kwalletd5',
|
f'--dest={service_name}',
|
||||||
'/modules/kwalletd5',
|
wallet_path,
|
||||||
'org.kde.KWallet.networkWallet'
|
'org.kde.KWallet.networkWallet'
|
||||||
], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
|
], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
|
||||||
|
|
||||||
|
@ -768,8 +858,8 @@ def _get_kwallet_network_wallet(logger):
|
||||||
return default_wallet
|
return default_wallet
|
||||||
|
|
||||||
|
|
||||||
def _get_kwallet_password(browser_keyring_name, logger):
|
def _get_kwallet_password(browser_keyring_name, keyring, logger):
|
||||||
logger.debug('using kwallet-query to obtain password from kwallet')
|
logger.debug(f'using kwallet-query to obtain password from {keyring.name}')
|
||||||
|
|
||||||
if shutil.which('kwallet-query') is None:
|
if shutil.which('kwallet-query') is None:
|
||||||
logger.error('kwallet-query command not found. KWallet and kwallet-query '
|
logger.error('kwallet-query command not found. KWallet and kwallet-query '
|
||||||
|
@ -777,7 +867,7 @@ def _get_kwallet_password(browser_keyring_name, logger):
|
||||||
'included in the kwallet package for your distribution')
|
'included in the kwallet package for your distribution')
|
||||||
return b''
|
return b''
|
||||||
|
|
||||||
network_wallet = _get_kwallet_network_wallet(logger)
|
network_wallet = _get_kwallet_network_wallet(keyring, logger)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
stdout, _, returncode = Popen.run([
|
stdout, _, returncode = Popen.run([
|
||||||
|
@ -799,8 +889,9 @@ def _get_kwallet_password(browser_keyring_name, logger):
|
||||||
# checks hasEntry. To verify this:
|
# checks hasEntry. To verify this:
|
||||||
# dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
|
# dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
|
||||||
# while starting chrome.
|
# while starting chrome.
|
||||||
# this may be a bug as the intended behaviour is to generate a random password and store
|
# this was identified as a bug later and fixed in
|
||||||
# it, but that doesn't matter here.
|
# https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/#F0
|
||||||
|
# https://chromium.googlesource.com/chromium/src/+/5463af3c39d7f5b6d11db7fbd51e38cc1974d764
|
||||||
return b''
|
return b''
|
||||||
else:
|
else:
|
||||||
logger.debug('password found')
|
logger.debug('password found')
|
||||||
|
@ -838,11 +929,11 @@ def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
|
||||||
keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
|
keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
|
||||||
logger.debug(f'Chosen keyring: {keyring.name}')
|
logger.debug(f'Chosen keyring: {keyring.name}')
|
||||||
|
|
||||||
if keyring == _LinuxKeyring.KWALLET:
|
if keyring in (_LinuxKeyring.KWALLET4, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6):
|
||||||
return _get_kwallet_password(browser_keyring_name, logger)
|
return _get_kwallet_password(browser_keyring_name, keyring, logger)
|
||||||
elif keyring == _LinuxKeyring.GNOMEKEYRING:
|
elif keyring == _LinuxKeyring.GNOME_KEYRING:
|
||||||
return _get_gnome_keyring_password(browser_keyring_name, logger)
|
return _get_gnome_keyring_password(browser_keyring_name, logger)
|
||||||
elif keyring == _LinuxKeyring.BASICTEXT:
|
elif keyring == _LinuxKeyring.BASIC_TEXT:
|
||||||
# when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
|
# when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
|
||||||
return None
|
return None
|
||||||
assert False, f'Unknown keyring {keyring}'
|
assert False, f'Unknown keyring {keyring}'
|
||||||
|
@ -867,6 +958,10 @@ def _get_mac_keyring_password(browser_keyring_name, logger):
|
||||||
|
|
||||||
|
|
||||||
def _get_windows_v10_key(browser_root, logger):
|
def _get_windows_v10_key(browser_root, logger):
|
||||||
|
"""
|
||||||
|
References:
|
||||||
|
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
|
||||||
|
"""
|
||||||
path = _find_most_recently_used_file(browser_root, 'Local State', logger)
|
path = _find_most_recently_used_file(browser_root, 'Local State', logger)
|
||||||
if path is None:
|
if path is None:
|
||||||
logger.error('could not find local state file')
|
logger.error('could not find local state file')
|
||||||
|
@ -875,11 +970,13 @@ def _get_windows_v10_key(browser_root, logger):
|
||||||
with open(path, encoding='utf8') as f:
|
with open(path, encoding='utf8') as f:
|
||||||
data = json.load(f)
|
data = json.load(f)
|
||||||
try:
|
try:
|
||||||
|
# kOsCryptEncryptedKeyPrefName in [1]
|
||||||
base64_key = data['os_crypt']['encrypted_key']
|
base64_key = data['os_crypt']['encrypted_key']
|
||||||
except KeyError:
|
except KeyError:
|
||||||
logger.error('no encrypted key in Local State')
|
logger.error('no encrypted key in Local State')
|
||||||
return None
|
return None
|
||||||
encrypted_key = base64.b64decode(base64_key)
|
encrypted_key = base64.b64decode(base64_key)
|
||||||
|
# kDPAPIKeyPrefix in [1]
|
||||||
prefix = b'DPAPI'
|
prefix = b'DPAPI'
|
||||||
if not encrypted_key.startswith(prefix):
|
if not encrypted_key.startswith(prefix):
|
||||||
logger.error('invalid key')
|
logger.error('invalid key')
|
||||||
|
@ -891,13 +988,15 @@ def pbkdf2_sha1(password, salt, iterations, key_length):
|
||||||
return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
|
return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
|
||||||
|
|
||||||
|
|
||||||
def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
|
def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):
|
||||||
plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
|
for key in keys:
|
||||||
try:
|
plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
|
||||||
return plaintext.decode()
|
try:
|
||||||
except UnicodeDecodeError:
|
return plaintext.decode()
|
||||||
logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
|
except UnicodeDecodeError:
|
||||||
return None
|
pass
|
||||||
|
logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
|
def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
|
||||||
|
@ -1091,3 +1190,139 @@ def load(self, data):
|
||||||
|
|
||||||
else:
|
else:
|
||||||
morsel = None
|
morsel = None
|
||||||
|
|
||||||
|
|
||||||
|
class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
|
||||||
|
"""
|
||||||
|
See [1] for cookie file format.
|
||||||
|
|
||||||
|
1. https://curl.haxx.se/docs/http-cookies.html
|
||||||
|
"""
|
||||||
|
_HTTPONLY_PREFIX = '#HttpOnly_'
|
||||||
|
_ENTRY_LEN = 7
|
||||||
|
_HEADER = '''# Netscape HTTP Cookie File
|
||||||
|
# This file is generated by yt-dlp. Do not edit.
|
||||||
|
|
||||||
|
'''
|
||||||
|
_CookieFileEntry = collections.namedtuple(
|
||||||
|
'CookieFileEntry',
|
||||||
|
('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
|
||||||
|
|
||||||
|
def __init__(self, filename=None, *args, **kwargs):
|
||||||
|
super().__init__(None, *args, **kwargs)
|
||||||
|
if is_path_like(filename):
|
||||||
|
filename = os.fspath(filename)
|
||||||
|
self.filename = filename
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _true_or_false(cndn):
|
||||||
|
return 'TRUE' if cndn else 'FALSE'
|
||||||
|
|
||||||
|
@contextlib.contextmanager
|
||||||
|
def open(self, file, *, write=False):
|
||||||
|
if is_path_like(file):
|
||||||
|
with open(file, 'w' if write else 'r', encoding='utf-8') as f:
|
||||||
|
yield f
|
||||||
|
else:
|
||||||
|
if write:
|
||||||
|
file.truncate(0)
|
||||||
|
yield file
|
||||||
|
|
||||||
|
def _really_save(self, f, ignore_discard=False, ignore_expires=False):
|
||||||
|
now = time.time()
|
||||||
|
for cookie in self:
|
||||||
|
if (not ignore_discard and cookie.discard
|
||||||
|
or not ignore_expires and cookie.is_expired(now)):
|
||||||
|
continue
|
||||||
|
name, value = cookie.name, cookie.value
|
||||||
|
if value is None:
|
||||||
|
# cookies.txt regards 'Set-Cookie: foo' as a cookie
|
||||||
|
# with no name, whereas http.cookiejar regards it as a
|
||||||
|
# cookie with no value.
|
||||||
|
name, value = '', name
|
||||||
|
f.write('%s\n' % '\t'.join((
|
||||||
|
cookie.domain,
|
||||||
|
self._true_or_false(cookie.domain.startswith('.')),
|
||||||
|
cookie.path,
|
||||||
|
self._true_or_false(cookie.secure),
|
||||||
|
str_or_none(cookie.expires, default=''),
|
||||||
|
name, value
|
||||||
|
)))
|
||||||
|
|
||||||
|
def save(self, filename=None, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
Save cookies to a file.
|
||||||
|
Code is taken from CPython 3.6
|
||||||
|
https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
|
||||||
|
|
||||||
|
if filename is None:
|
||||||
|
if self.filename is not None:
|
||||||
|
filename = self.filename
|
||||||
|
else:
|
||||||
|
raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
|
||||||
|
|
||||||
|
# Store session cookies with `expires` set to 0 instead of an empty string
|
||||||
|
for cookie in self:
|
||||||
|
if cookie.expires is None:
|
||||||
|
cookie.expires = 0
|
||||||
|
|
||||||
|
with self.open(filename, write=True) as f:
|
||||||
|
f.write(self._HEADER)
|
||||||
|
self._really_save(f, *args, **kwargs)
|
||||||
|
|
||||||
|
def load(self, filename=None, ignore_discard=False, ignore_expires=False):
|
||||||
|
"""Load cookies from a file."""
|
||||||
|
if filename is None:
|
||||||
|
if self.filename is not None:
|
||||||
|
filename = self.filename
|
||||||
|
else:
|
||||||
|
raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
|
||||||
|
|
||||||
|
def prepare_line(line):
|
||||||
|
if line.startswith(self._HTTPONLY_PREFIX):
|
||||||
|
line = line[len(self._HTTPONLY_PREFIX):]
|
||||||
|
# comments and empty lines are fine
|
||||||
|
if line.startswith('#') or not line.strip():
|
||||||
|
return line
|
||||||
|
cookie_list = line.split('\t')
|
||||||
|
if len(cookie_list) != self._ENTRY_LEN:
|
||||||
|
raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
|
||||||
|
cookie = self._CookieFileEntry(*cookie_list)
|
||||||
|
if cookie.expires_at and not cookie.expires_at.isdigit():
|
||||||
|
raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
|
||||||
|
return line
|
||||||
|
|
||||||
|
cf = io.StringIO()
|
||||||
|
with self.open(filename) as f:
|
||||||
|
for line in f:
|
||||||
|
try:
|
||||||
|
cf.write(prepare_line(line))
|
||||||
|
except http.cookiejar.LoadError as e:
|
||||||
|
if f'{line.strip()} '[0] in '[{"':
|
||||||
|
raise http.cookiejar.LoadError(
|
||||||
|
'Cookies file must be Netscape formatted, not JSON. See '
|
||||||
|
'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
|
||||||
|
write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
|
||||||
|
continue
|
||||||
|
cf.seek(0)
|
||||||
|
self._really_load(cf, filename, ignore_discard, ignore_expires)
|
||||||
|
# Session cookies are denoted by either `expires` field set to
|
||||||
|
# an empty string or 0. MozillaCookieJar only recognizes the former
|
||||||
|
# (see [1]). So we need force the latter to be recognized as session
|
||||||
|
# cookies on our own.
|
||||||
|
# Session cookies may be important for cookies-based authentication,
|
||||||
|
# e.g. usually, when user does not check 'Remember me' check box while
|
||||||
|
# logging in on a site, some important cookies are stored as session
|
||||||
|
# cookies so that not recognizing them will result in failed login.
|
||||||
|
# 1. https://bugs.python.org/issue17164
|
||||||
|
for cookie in self:
|
||||||
|
# Treat `expires=0` cookies as session cookies
|
||||||
|
if cookie.expires == 0:
|
||||||
|
cookie.expires = None
|
||||||
|
cookie.discard = True
|
||||||
|
|
||||||
|
def get_cookie_header(self, url):
|
||||||
|
"""Generate a Cookie HTTP header for a given url"""
|
||||||
|
cookie_req = urllib.request.Request(escape_url(sanitize_url(url)))
|
||||||
|
self.add_cookie_header(cookie_req)
|
||||||
|
return cookie_req.get_header('Cookie')
|
||||||
|
|
|
@ -30,7 +30,7 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N
|
||||||
from .http import HttpFD
|
from .http import HttpFD
|
||||||
from .ism import IsmFD
|
from .ism import IsmFD
|
||||||
from .mhtml import MhtmlFD
|
from .mhtml import MhtmlFD
|
||||||
from .niconico import NiconicoDmcFD
|
from .niconico import NiconicoDmcFD, NiconicoLiveFD
|
||||||
from .rtmp import RtmpFD
|
from .rtmp import RtmpFD
|
||||||
from .rtsp import RtspFD
|
from .rtsp import RtspFD
|
||||||
from .websocket import WebSocketFragmentFD
|
from .websocket import WebSocketFragmentFD
|
||||||
|
@ -50,6 +50,7 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N
|
||||||
'ism': IsmFD,
|
'ism': IsmFD,
|
||||||
'mhtml': MhtmlFD,
|
'mhtml': MhtmlFD,
|
||||||
'niconico_dmc': NiconicoDmcFD,
|
'niconico_dmc': NiconicoDmcFD,
|
||||||
|
'niconico_live': NiconicoLiveFD,
|
||||||
'fc2_live': FC2LiveFD,
|
'fc2_live': FC2LiveFD,
|
||||||
'websocket_frag': WebSocketFragmentFD,
|
'websocket_frag': WebSocketFragmentFD,
|
||||||
'youtube_live_chat': YoutubeLiveChatFD,
|
'youtube_live_chat': YoutubeLiveChatFD,
|
||||||
|
|
|
@ -51,8 +51,9 @@ class FileDownloader:
|
||||||
ratelimit: Download speed limit, in bytes/sec.
|
ratelimit: Download speed limit, in bytes/sec.
|
||||||
continuedl: Attempt to continue downloads if possible
|
continuedl: Attempt to continue downloads if possible
|
||||||
throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
|
throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
|
||||||
retries: Number of times to retry for HTTP error 5xx
|
retries: Number of times to retry for expected network errors.
|
||||||
file_access_retries: Number of times to retry on file access error
|
Default is 0 for API, but 10 for CLI
|
||||||
|
file_access_retries: Number of times to retry on file access error (default: 3)
|
||||||
buffersize: Size of download buffer in bytes.
|
buffersize: Size of download buffer in bytes.
|
||||||
noresizebuffer: Do not automatically resize the download buffer.
|
noresizebuffer: Do not automatically resize the download buffer.
|
||||||
continuedl: Try to continue downloads if possible.
|
continuedl: Try to continue downloads if possible.
|
||||||
|
@ -138,17 +139,21 @@ def calc_percent(byte_counter, data_len):
|
||||||
def format_percent(percent):
|
def format_percent(percent):
|
||||||
return ' N/A%' if percent is None else f'{percent:>5.1f}%'
|
return ' N/A%' if percent is None else f'{percent:>5.1f}%'
|
||||||
|
|
||||||
@staticmethod
|
@classmethod
|
||||||
def calc_eta(start, now, total, current):
|
def calc_eta(cls, start_or_rate, now_or_remaining, total=NO_DEFAULT, current=NO_DEFAULT):
|
||||||
|
if total is NO_DEFAULT:
|
||||||
|
rate, remaining = start_or_rate, now_or_remaining
|
||||||
|
if None in (rate, remaining):
|
||||||
|
return None
|
||||||
|
return int(float(remaining) / rate)
|
||||||
|
|
||||||
|
start, now = start_or_rate, now_or_remaining
|
||||||
if total is None:
|
if total is None:
|
||||||
return None
|
return None
|
||||||
if now is None:
|
if now is None:
|
||||||
now = time.time()
|
now = time.time()
|
||||||
dif = now - start
|
rate = cls.calc_speed(start, now, current)
|
||||||
if current == 0 or dif < 0.001: # One millisecond
|
return rate and int((float(total) - float(current)) / rate)
|
||||||
return None
|
|
||||||
rate = float(current) / dif
|
|
||||||
return int((float(total) - float(current)) / rate)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def calc_speed(start, now, bytes):
|
def calc_speed(start, now, bytes):
|
||||||
|
@ -165,6 +170,12 @@ def format_speed(speed):
|
||||||
def format_retries(retries):
|
def format_retries(retries):
|
||||||
return 'inf' if retries == float('inf') else int(retries)
|
return 'inf' if retries == float('inf') else int(retries)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def filesize_or_none(unencoded_filename):
|
||||||
|
if os.path.isfile(unencoded_filename):
|
||||||
|
return os.path.getsize(unencoded_filename)
|
||||||
|
return 0
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def best_block_size(elapsed_time, bytes):
|
def best_block_size(elapsed_time, bytes):
|
||||||
new_min = max(bytes / 2.0, 1.0)
|
new_min = max(bytes / 2.0, 1.0)
|
||||||
|
@ -225,7 +236,7 @@ def error_callback(err, count, retries, *, fd):
|
||||||
sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access'))
|
sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access'))
|
||||||
|
|
||||||
def wrapper(self, func, *args, **kwargs):
|
def wrapper(self, func, *args, **kwargs):
|
||||||
for retry in RetryManager(self.params.get('file_access_retries'), error_callback, fd=self):
|
for retry in RetryManager(self.params.get('file_access_retries', 3), error_callback, fd=self):
|
||||||
try:
|
try:
|
||||||
return func(self, *args, **kwargs)
|
return func(self, *args, **kwargs)
|
||||||
except OSError as err:
|
except OSError as err:
|
||||||
|
@ -285,7 +296,8 @@ def _prepare_multiline_status(self, lines=1):
|
||||||
self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines)
|
self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines)
|
||||||
else:
|
else:
|
||||||
self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet'))
|
self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet'))
|
||||||
self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color')
|
self._multiline.allow_colors = self.ydl._allow_colors.out and self.ydl._allow_colors.out != 'no_color'
|
||||||
|
self._multiline._HAVE_FULLCAP = self.ydl._allow_colors.out
|
||||||
|
|
||||||
def _finish_multiline_status(self):
|
def _finish_multiline_status(self):
|
||||||
self._multiline.end()
|
self._multiline.end()
|
||||||
|
|
|
@ -23,7 +23,6 @@
|
||||||
encodeArgument,
|
encodeArgument,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
find_available_port,
|
find_available_port,
|
||||||
handle_youtubedl_headers,
|
|
||||||
remove_end,
|
remove_end,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
|
@ -529,10 +528,9 @@ def _call_downloader(self, tmpfilename, info_dict):
|
||||||
selected_formats = info_dict.get('requested_formats') or [info_dict]
|
selected_formats = info_dict.get('requested_formats') or [info_dict]
|
||||||
for i, fmt in enumerate(selected_formats):
|
for i, fmt in enumerate(selected_formats):
|
||||||
if fmt.get('http_headers') and re.match(r'^https?://', fmt['url']):
|
if fmt.get('http_headers') and re.match(r'^https?://', fmt['url']):
|
||||||
headers_dict = handle_youtubedl_headers(fmt['http_headers'])
|
|
||||||
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
|
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
|
||||||
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
|
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
|
||||||
args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in headers_dict.items())])
|
args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in fmt['http_headers'].items())])
|
||||||
|
|
||||||
if start_time:
|
if start_time:
|
||||||
args += ['-ss', str(start_time)]
|
args += ['-ss', str(start_time)]
|
||||||
|
|
|
@ -34,8 +34,8 @@ class FragmentFD(FileDownloader):
|
||||||
|
|
||||||
Available options:
|
Available options:
|
||||||
|
|
||||||
fragment_retries: Number of times to retry a fragment for HTTP error (DASH
|
fragment_retries: Number of times to retry a fragment for HTTP error
|
||||||
and hlsnative only)
|
(DASH and hlsnative only). Default is 0 for API, but 10 for CLI
|
||||||
skip_unavailable_fragments:
|
skip_unavailable_fragments:
|
||||||
Skip unavailable fragments (DASH and hlsnative only)
|
Skip unavailable fragments (DASH and hlsnative only)
|
||||||
keep_fragments: Keep downloaded fragments on disk after downloading is
|
keep_fragments: Keep downloaded fragments on disk after downloading is
|
||||||
|
@ -121,6 +121,11 @@ def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_dat
|
||||||
'request_data': request_data,
|
'request_data': request_data,
|
||||||
'ctx_id': ctx.get('ctx_id'),
|
'ctx_id': ctx.get('ctx_id'),
|
||||||
}
|
}
|
||||||
|
frag_resume_len = 0
|
||||||
|
if ctx['dl'].params.get('continuedl', True):
|
||||||
|
frag_resume_len = self.filesize_or_none(self.temp_name(fragment_filename))
|
||||||
|
fragment_info_dict['frag_resume_len'] = ctx['frag_resume_len'] = frag_resume_len
|
||||||
|
|
||||||
success, _ = ctx['dl'].download(fragment_filename, fragment_info_dict)
|
success, _ = ctx['dl'].download(fragment_filename, fragment_info_dict)
|
||||||
if not success:
|
if not success:
|
||||||
return False
|
return False
|
||||||
|
@ -155,9 +160,7 @@ def _append_fragment(self, ctx, frag_content):
|
||||||
del ctx['fragment_filename_sanitized']
|
del ctx['fragment_filename_sanitized']
|
||||||
|
|
||||||
def _prepare_frag_download(self, ctx):
|
def _prepare_frag_download(self, ctx):
|
||||||
if 'live' not in ctx:
|
if not ctx.setdefault('live', False):
|
||||||
ctx['live'] = False
|
|
||||||
if not ctx['live']:
|
|
||||||
total_frags_str = '%d' % ctx['total_frags']
|
total_frags_str = '%d' % ctx['total_frags']
|
||||||
ad_frags = ctx.get('ad_frags', 0)
|
ad_frags = ctx.get('ad_frags', 0)
|
||||||
if ad_frags:
|
if ad_frags:
|
||||||
|
@ -173,12 +176,11 @@ def _prepare_frag_download(self, ctx):
|
||||||
})
|
})
|
||||||
tmpfilename = self.temp_name(ctx['filename'])
|
tmpfilename = self.temp_name(ctx['filename'])
|
||||||
open_mode = 'wb'
|
open_mode = 'wb'
|
||||||
resume_len = 0
|
|
||||||
|
|
||||||
# Establish possible resume length
|
# Establish possible resume length
|
||||||
if os.path.isfile(encodeFilename(tmpfilename)):
|
resume_len = self.filesize_or_none(tmpfilename)
|
||||||
|
if resume_len > 0:
|
||||||
open_mode = 'ab'
|
open_mode = 'ab'
|
||||||
resume_len = os.path.getsize(encodeFilename(tmpfilename))
|
|
||||||
|
|
||||||
# Should be initialized before ytdl file check
|
# Should be initialized before ytdl file check
|
||||||
ctx.update({
|
ctx.update({
|
||||||
|
@ -187,7 +189,9 @@ def _prepare_frag_download(self, ctx):
|
||||||
})
|
})
|
||||||
|
|
||||||
if self.__do_ytdl_file(ctx):
|
if self.__do_ytdl_file(ctx):
|
||||||
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
|
ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename'])))
|
||||||
|
continuedl = self.params.get('continuedl', True)
|
||||||
|
if continuedl and ytdl_file_exists:
|
||||||
self._read_ytdl_file(ctx)
|
self._read_ytdl_file(ctx)
|
||||||
is_corrupt = ctx.get('ytdl_corrupt') is True
|
is_corrupt = ctx.get('ytdl_corrupt') is True
|
||||||
is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
|
is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
|
||||||
|
@ -201,7 +205,12 @@ def _prepare_frag_download(self, ctx):
|
||||||
if 'ytdl_corrupt' in ctx:
|
if 'ytdl_corrupt' in ctx:
|
||||||
del ctx['ytdl_corrupt']
|
del ctx['ytdl_corrupt']
|
||||||
self._write_ytdl_file(ctx)
|
self._write_ytdl_file(ctx)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
if not continuedl:
|
||||||
|
if ytdl_file_exists:
|
||||||
|
self._read_ytdl_file(ctx)
|
||||||
|
ctx['fragment_index'] = resume_len = 0
|
||||||
self._write_ytdl_file(ctx)
|
self._write_ytdl_file(ctx)
|
||||||
assert ctx['fragment_index'] == 0
|
assert ctx['fragment_index'] == 0
|
||||||
|
|
||||||
|
@ -274,12 +283,10 @@ def frag_progress_hook(s):
|
||||||
else:
|
else:
|
||||||
frag_downloaded_bytes = s['downloaded_bytes']
|
frag_downloaded_bytes = s['downloaded_bytes']
|
||||||
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
|
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
|
||||||
if not ctx['live']:
|
|
||||||
state['eta'] = self.calc_eta(
|
|
||||||
start, time_now, estimated_size - resume_len,
|
|
||||||
state['downloaded_bytes'] - resume_len)
|
|
||||||
ctx['speed'] = state['speed'] = self.calc_speed(
|
ctx['speed'] = state['speed'] = self.calc_speed(
|
||||||
ctx['fragment_started'], time_now, frag_downloaded_bytes)
|
ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx.get('frag_resume_len', 0))
|
||||||
|
if not ctx['live']:
|
||||||
|
state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes'])
|
||||||
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
|
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
|
||||||
self._hook_progress(state, info_dict)
|
self._hook_progress(state, info_dict)
|
||||||
|
|
||||||
|
@ -297,7 +304,7 @@ def _finish_frag_download(self, ctx, info_dict):
|
||||||
|
|
||||||
to_file = ctx['tmpfilename'] != '-'
|
to_file = ctx['tmpfilename'] != '-'
|
||||||
if to_file:
|
if to_file:
|
||||||
downloaded_bytes = os.path.getsize(encodeFilename(ctx['tmpfilename']))
|
downloaded_bytes = self.filesize_or_none(ctx['tmpfilename'])
|
||||||
else:
|
else:
|
||||||
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
|
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
|
||||||
|
|
||||||
|
|
|
@ -45,8 +45,8 @@ class DownloadContext(dict):
|
||||||
ctx.tmpfilename = self.temp_name(filename)
|
ctx.tmpfilename = self.temp_name(filename)
|
||||||
ctx.stream = None
|
ctx.stream = None
|
||||||
|
|
||||||
# Do not include the Accept-Encoding header
|
# Disable compression
|
||||||
headers = {'Youtubedl-no-compression': 'True'}
|
headers = {'Accept-Encoding': 'identity'}
|
||||||
add_headers = info_dict.get('http_headers')
|
add_headers = info_dict.get('http_headers')
|
||||||
if add_headers:
|
if add_headers:
|
||||||
headers.update(add_headers)
|
headers.update(add_headers)
|
||||||
|
@ -150,7 +150,8 @@ def establish_connection():
|
||||||
# Content-Range is either not present or invalid. Assuming remote webserver is
|
# Content-Range is either not present or invalid. Assuming remote webserver is
|
||||||
# trying to send the whole file, resume is not possible, so wiping the local file
|
# trying to send the whole file, resume is not possible, so wiping the local file
|
||||||
# and performing entire redownload
|
# and performing entire redownload
|
||||||
self.report_unable_to_resume()
|
elif range_start > 0:
|
||||||
|
self.report_unable_to_resume()
|
||||||
ctx.resume_len = 0
|
ctx.resume_len = 0
|
||||||
ctx.open_mode = 'wb'
|
ctx.open_mode = 'wb'
|
||||||
ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None))
|
ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None))
|
||||||
|
|
|
@ -1,8 +1,17 @@
|
||||||
|
import json
|
||||||
import threading
|
import threading
|
||||||
|
import time
|
||||||
|
|
||||||
from . import get_suitable_downloader
|
from . import get_suitable_downloader
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from ..utils import sanitized_Request
|
from .external import FFmpegFD
|
||||||
|
from ..utils import (
|
||||||
|
DownloadError,
|
||||||
|
str_or_none,
|
||||||
|
sanitized_Request,
|
||||||
|
WebSocketsWrapper,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class NiconicoDmcFD(FileDownloader):
|
class NiconicoDmcFD(FileDownloader):
|
||||||
|
@ -50,3 +59,93 @@ def heartbeat():
|
||||||
timer[0].cancel()
|
timer[0].cancel()
|
||||||
download_complete = True
|
download_complete = True
|
||||||
return success
|
return success
|
||||||
|
|
||||||
|
|
||||||
|
class NiconicoLiveFD(FileDownloader):
|
||||||
|
""" Downloads niconico live without being stopped """
|
||||||
|
|
||||||
|
def real_download(self, filename, info_dict):
|
||||||
|
video_id = info_dict['video_id']
|
||||||
|
ws_url = info_dict['url']
|
||||||
|
ws_extractor = info_dict['ws']
|
||||||
|
ws_origin_host = info_dict['origin']
|
||||||
|
cookies = info_dict.get('cookies')
|
||||||
|
live_quality = info_dict.get('live_quality', 'high')
|
||||||
|
live_latency = info_dict.get('live_latency', 'high')
|
||||||
|
dl = FFmpegFD(self.ydl, self.params or {})
|
||||||
|
|
||||||
|
new_info_dict = info_dict.copy()
|
||||||
|
new_info_dict.update({
|
||||||
|
'protocol': 'm3u8',
|
||||||
|
})
|
||||||
|
|
||||||
|
def communicate_ws(reconnect):
|
||||||
|
if reconnect:
|
||||||
|
ws = WebSocketsWrapper(ws_url, {
|
||||||
|
'Cookies': str_or_none(cookies) or '',
|
||||||
|
'Origin': f'https://{ws_origin_host}',
|
||||||
|
'Accept': '*/*',
|
||||||
|
'User-Agent': self.params['http_headers']['User-Agent'],
|
||||||
|
})
|
||||||
|
if self.ydl.params.get('verbose', False):
|
||||||
|
self.to_screen('[debug] Sending startWatching request')
|
||||||
|
ws.send(json.dumps({
|
||||||
|
'type': 'startWatching',
|
||||||
|
'data': {
|
||||||
|
'stream': {
|
||||||
|
'quality': live_quality,
|
||||||
|
'protocol': 'hls+fmp4',
|
||||||
|
'latency': live_latency,
|
||||||
|
'chasePlay': False
|
||||||
|
},
|
||||||
|
'room': {
|
||||||
|
'protocol': 'webSocket',
|
||||||
|
'commentable': True
|
||||||
|
},
|
||||||
|
'reconnect': True,
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
else:
|
||||||
|
ws = ws_extractor
|
||||||
|
with ws:
|
||||||
|
while True:
|
||||||
|
recv = ws.recv()
|
||||||
|
if not recv:
|
||||||
|
continue
|
||||||
|
data = json.loads(recv)
|
||||||
|
if not data or not isinstance(data, dict):
|
||||||
|
continue
|
||||||
|
if data.get('type') == 'ping':
|
||||||
|
# pong back
|
||||||
|
ws.send(r'{"type":"pong"}')
|
||||||
|
ws.send(r'{"type":"keepSeat"}')
|
||||||
|
elif data.get('type') == 'disconnect':
|
||||||
|
self.write_debug(data)
|
||||||
|
return True
|
||||||
|
elif data.get('type') == 'error':
|
||||||
|
self.write_debug(data)
|
||||||
|
message = try_get(data, lambda x: x['body']['code'], str) or recv
|
||||||
|
return DownloadError(message)
|
||||||
|
elif self.ydl.params.get('verbose', False):
|
||||||
|
if len(recv) > 100:
|
||||||
|
recv = recv[:100] + '...'
|
||||||
|
self.to_screen('[debug] Server said: %s' % recv)
|
||||||
|
|
||||||
|
def ws_main():
|
||||||
|
reconnect = False
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
ret = communicate_ws(reconnect)
|
||||||
|
if ret is True:
|
||||||
|
return
|
||||||
|
except BaseException as e:
|
||||||
|
self.to_screen('[%s] %s: Connection error occured, reconnecting after 10 seconds: %s' % ('niconico:live', video_id, str_or_none(e)))
|
||||||
|
time.sleep(10)
|
||||||
|
continue
|
||||||
|
finally:
|
||||||
|
reconnect = True
|
||||||
|
|
||||||
|
thread = threading.Thread(target=ws_main, daemon=True)
|
||||||
|
thread.start()
|
||||||
|
|
||||||
|
return dl.download(filename, new_info_dict)
|
||||||
|
|
|
@ -204,7 +204,11 @@
|
||||||
BFMTVLiveIE,
|
BFMTVLiveIE,
|
||||||
BFMTVArticleIE,
|
BFMTVArticleIE,
|
||||||
)
|
)
|
||||||
from .bibeltv import BibelTVIE
|
from .bibeltv import (
|
||||||
|
BibelTVLiveIE,
|
||||||
|
BibelTVSeriesIE,
|
||||||
|
BibelTVVideoIE,
|
||||||
|
)
|
||||||
from .bigflix import BigflixIE
|
from .bigflix import BigflixIE
|
||||||
from .bigo import BigoIE
|
from .bigo import BigoIE
|
||||||
from .bild import BildIE
|
from .bild import BildIE
|
||||||
|
@ -247,7 +251,6 @@
|
||||||
from .bostonglobe import BostonGlobeIE
|
from .bostonglobe import BostonGlobeIE
|
||||||
from .box import BoxIE
|
from .box import BoxIE
|
||||||
from .boxcast import BoxCastVideoIE
|
from .boxcast import BoxCastVideoIE
|
||||||
from .booyah import BooyahClipsIE
|
|
||||||
from .bpb import BpbIE
|
from .bpb import BpbIE
|
||||||
from .br import (
|
from .br import (
|
||||||
BRIE,
|
BRIE,
|
||||||
|
@ -281,6 +284,10 @@
|
||||||
CamdemyIE,
|
CamdemyIE,
|
||||||
CamdemyFolderIE
|
CamdemyFolderIE
|
||||||
)
|
)
|
||||||
|
from .camfm import (
|
||||||
|
CamFMEpisodeIE,
|
||||||
|
CamFMShowIE
|
||||||
|
)
|
||||||
from .cammodels import CamModelsIE
|
from .cammodels import CamModelsIE
|
||||||
from .camsoda import CamsodaIE
|
from .camsoda import CamsodaIE
|
||||||
from .camtasia import CamtasiaEmbedIE
|
from .camtasia import CamtasiaEmbedIE
|
||||||
|
@ -288,12 +295,6 @@
|
||||||
from .canalalpha import CanalAlphaIE
|
from .canalalpha import CanalAlphaIE
|
||||||
from .canalplus import CanalplusIE
|
from .canalplus import CanalplusIE
|
||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
from .canvas import (
|
|
||||||
CanvasIE,
|
|
||||||
CanvasEenIE,
|
|
||||||
VrtNUIE,
|
|
||||||
DagelijkseKostIE,
|
|
||||||
)
|
|
||||||
from .carambatv import (
|
from .carambatv import (
|
||||||
CarambaTVIE,
|
CarambaTVIE,
|
||||||
CarambaTVPageIE,
|
CarambaTVPageIE,
|
||||||
|
@ -310,14 +311,14 @@
|
||||||
CBSIE,
|
CBSIE,
|
||||||
ParamountPressExpressIE,
|
ParamountPressExpressIE,
|
||||||
)
|
)
|
||||||
from .cbslocal import (
|
|
||||||
CBSLocalIE,
|
|
||||||
CBSLocalArticleIE,
|
|
||||||
)
|
|
||||||
from .cbsinteractive import CBSInteractiveIE
|
from .cbsinteractive import CBSInteractiveIE
|
||||||
from .cbsnews import (
|
from .cbsnews import (
|
||||||
CBSNewsEmbedIE,
|
CBSNewsEmbedIE,
|
||||||
CBSNewsIE,
|
CBSNewsIE,
|
||||||
|
CBSLocalIE,
|
||||||
|
CBSLocalArticleIE,
|
||||||
|
CBSLocalLiveIE,
|
||||||
|
CBSNewsLiveIE,
|
||||||
CBSNewsLiveVideoIE,
|
CBSNewsLiveVideoIE,
|
||||||
)
|
)
|
||||||
from .cbssports import (
|
from .cbssports import (
|
||||||
|
@ -404,9 +405,12 @@
|
||||||
CrowdBunkerIE,
|
CrowdBunkerIE,
|
||||||
CrowdBunkerChannelIE,
|
CrowdBunkerChannelIE,
|
||||||
)
|
)
|
||||||
|
from .crtvg import CrtvgIE
|
||||||
from .crunchyroll import (
|
from .crunchyroll import (
|
||||||
CrunchyrollBetaIE,
|
CrunchyrollBetaIE,
|
||||||
CrunchyrollBetaShowIE,
|
CrunchyrollBetaShowIE,
|
||||||
|
CrunchyrollMusicIE,
|
||||||
|
CrunchyrollArtistIE,
|
||||||
)
|
)
|
||||||
from .cspan import CSpanIE, CSpanCongressIE
|
from .cspan import CSpanIE, CSpanCongressIE
|
||||||
from .ctsnews import CtsNewsIE
|
from .ctsnews import CtsNewsIE
|
||||||
|
@ -423,6 +427,10 @@
|
||||||
CybraryIE,
|
CybraryIE,
|
||||||
CybraryCourseIE
|
CybraryCourseIE
|
||||||
)
|
)
|
||||||
|
from .dacast import (
|
||||||
|
DacastVODIE,
|
||||||
|
DacastPlaylistIE,
|
||||||
|
)
|
||||||
from .daftsex import DaftsexIE
|
from .daftsex import DaftsexIE
|
||||||
from .dailymail import DailyMailIE
|
from .dailymail import DailyMailIE
|
||||||
from .dailymotion import (
|
from .dailymotion import (
|
||||||
|
@ -536,6 +544,7 @@
|
||||||
from .eighttracks import EightTracksIE
|
from .eighttracks import EightTracksIE
|
||||||
from .einthusan import EinthusanIE
|
from .einthusan import EinthusanIE
|
||||||
from .eitb import EitbIE
|
from .eitb import EitbIE
|
||||||
|
from .elevensports import ElevenSportsIE
|
||||||
from .ellentube import (
|
from .ellentube import (
|
||||||
EllenTubeIE,
|
EllenTubeIE,
|
||||||
EllenTubeVideoIE,
|
EllenTubeVideoIE,
|
||||||
|
@ -784,6 +793,7 @@
|
||||||
IchinanaLiveIE,
|
IchinanaLiveIE,
|
||||||
IchinanaLiveClipIE,
|
IchinanaLiveClipIE,
|
||||||
)
|
)
|
||||||
|
from .idolplus import IdolPlusIE
|
||||||
from .ign import (
|
from .ign import (
|
||||||
IGNIE,
|
IGNIE,
|
||||||
IGNVideoIE,
|
IGNVideoIE,
|
||||||
|
@ -868,6 +878,7 @@
|
||||||
from .jeuxvideo import JeuxVideoIE
|
from .jeuxvideo import JeuxVideoIE
|
||||||
from .jove import JoveIE
|
from .jove import JoveIE
|
||||||
from .joj import JojIE
|
from .joj import JojIE
|
||||||
|
from .jstream import JStreamIE
|
||||||
from .jwplatform import JWPlatformIE
|
from .jwplatform import JWPlatformIE
|
||||||
from .kakao import KakaoIE
|
from .kakao import KakaoIE
|
||||||
from .kaltura import KalturaIE
|
from .kaltura import KalturaIE
|
||||||
|
@ -877,7 +888,6 @@
|
||||||
from .karrierevideos import KarriereVideosIE
|
from .karrierevideos import KarriereVideosIE
|
||||||
from .keezmovies import KeezMoviesIE
|
from .keezmovies import KeezMoviesIE
|
||||||
from .kelbyone import KelbyOneIE
|
from .kelbyone import KelbyOneIE
|
||||||
from .ketnet import KetnetIE
|
|
||||||
from .khanacademy import (
|
from .khanacademy import (
|
||||||
KhanAcademyIE,
|
KhanAcademyIE,
|
||||||
KhanAcademyUnitIE,
|
KhanAcademyUnitIE,
|
||||||
|
@ -1147,6 +1157,7 @@
|
||||||
)
|
)
|
||||||
from .myvideoge import MyVideoGeIE
|
from .myvideoge import MyVideoGeIE
|
||||||
from .myvidster import MyVidsterIE
|
from .myvidster import MyVidsterIE
|
||||||
|
from .mzaalo import MzaaloIE
|
||||||
from .n1 import (
|
from .n1 import (
|
||||||
N1InfoAssetIE,
|
N1InfoAssetIE,
|
||||||
N1InfoIIE,
|
N1InfoIIE,
|
||||||
|
@ -1195,6 +1206,7 @@
|
||||||
NebulaSubscriptionsIE,
|
NebulaSubscriptionsIE,
|
||||||
NebulaChannelIE,
|
NebulaChannelIE,
|
||||||
)
|
)
|
||||||
|
from .nekohacker import NekoHackerIE
|
||||||
from .nerdcubed import NerdCubedFeedIE
|
from .nerdcubed import NerdCubedFeedIE
|
||||||
from .netzkino import NetzkinoIE
|
from .netzkino import NetzkinoIE
|
||||||
from .neteasemusic import (
|
from .neteasemusic import (
|
||||||
|
@ -1264,6 +1276,7 @@
|
||||||
NicovideoSearchIE,
|
NicovideoSearchIE,
|
||||||
NicovideoSearchURLIE,
|
NicovideoSearchURLIE,
|
||||||
NicovideoTagURLIE,
|
NicovideoTagURLIE,
|
||||||
|
NiconicoLiveIE,
|
||||||
)
|
)
|
||||||
from .ninecninemedia import (
|
from .ninecninemedia import (
|
||||||
NineCNineMediaIE,
|
NineCNineMediaIE,
|
||||||
|
@ -1373,6 +1386,7 @@
|
||||||
ORFIPTVIE,
|
ORFIPTVIE,
|
||||||
)
|
)
|
||||||
from .outsidetv import OutsideTVIE
|
from .outsidetv import OutsideTVIE
|
||||||
|
from .owncloud import OwnCloudIE
|
||||||
from .packtpub import (
|
from .packtpub import (
|
||||||
PacktPubIE,
|
PacktPubIE,
|
||||||
PacktPubCourseIE,
|
PacktPubCourseIE,
|
||||||
|
@ -1474,7 +1488,6 @@
|
||||||
PolskieRadioPlayerIE,
|
PolskieRadioPlayerIE,
|
||||||
PolskieRadioPodcastIE,
|
PolskieRadioPodcastIE,
|
||||||
PolskieRadioPodcastListIE,
|
PolskieRadioPodcastListIE,
|
||||||
PolskieRadioRadioKierowcowIE,
|
|
||||||
)
|
)
|
||||||
from .popcorntimes import PopcorntimesIE
|
from .popcorntimes import PopcorntimesIE
|
||||||
from .popcorntv import PopcornTVIE
|
from .popcorntv import PopcornTVIE
|
||||||
|
@ -1544,6 +1557,8 @@
|
||||||
RadLiveSeasonIE,
|
RadLiveSeasonIE,
|
||||||
)
|
)
|
||||||
from .rai import (
|
from .rai import (
|
||||||
|
RaiIE,
|
||||||
|
RaiCulturaIE,
|
||||||
RaiPlayIE,
|
RaiPlayIE,
|
||||||
RaiPlayLiveIE,
|
RaiPlayLiveIE,
|
||||||
RaiPlayPlaylistIE,
|
RaiPlayPlaylistIE,
|
||||||
|
@ -1552,7 +1567,6 @@
|
||||||
RaiPlaySoundPlaylistIE,
|
RaiPlaySoundPlaylistIE,
|
||||||
RaiNewsIE,
|
RaiNewsIE,
|
||||||
RaiSudtirolIE,
|
RaiSudtirolIE,
|
||||||
RaiIE,
|
|
||||||
)
|
)
|
||||||
from .raywenderlich import (
|
from .raywenderlich import (
|
||||||
RayWenderlichIE,
|
RayWenderlichIE,
|
||||||
|
@ -1574,6 +1588,7 @@
|
||||||
RCTIPlusTVIE,
|
RCTIPlusTVIE,
|
||||||
)
|
)
|
||||||
from .rds import RDSIE
|
from .rds import RDSIE
|
||||||
|
from .recurbate import RecurbateIE
|
||||||
from .redbee import ParliamentLiveUKIE, RTBFIE
|
from .redbee import ParliamentLiveUKIE, RTBFIE
|
||||||
from .redbulltv import (
|
from .redbulltv import (
|
||||||
RedBullTVIE,
|
RedBullTVIE,
|
||||||
|
@ -2080,7 +2095,6 @@
|
||||||
)
|
)
|
||||||
from .tvplay import (
|
from .tvplay import (
|
||||||
TVPlayIE,
|
TVPlayIE,
|
||||||
ViafreeIE,
|
|
||||||
TVPlayHomeIE,
|
TVPlayHomeIE,
|
||||||
)
|
)
|
||||||
from .tvplayer import TVPlayerIE
|
from .tvplayer import TVPlayerIE
|
||||||
|
@ -2264,7 +2278,12 @@
|
||||||
VoxMediaVolumeIE,
|
VoxMediaVolumeIE,
|
||||||
VoxMediaIE,
|
VoxMediaIE,
|
||||||
)
|
)
|
||||||
from .vrt import VRTIE
|
from .vrt import (
|
||||||
|
VRTIE,
|
||||||
|
VrtNUIE,
|
||||||
|
KetnetIE,
|
||||||
|
DagelijkseKostIE,
|
||||||
|
)
|
||||||
from .vrak import VrakIE
|
from .vrak import VrakIE
|
||||||
from .vrv import (
|
from .vrv import (
|
||||||
VRVIE,
|
VRVIE,
|
||||||
|
@ -2315,7 +2334,16 @@
|
||||||
WeiboMobileIE
|
WeiboMobileIE
|
||||||
)
|
)
|
||||||
from .weiqitv import WeiqiTVIE
|
from .weiqitv import WeiqiTVIE
|
||||||
|
from .weverse import (
|
||||||
|
WeverseIE,
|
||||||
|
WeverseMediaIE,
|
||||||
|
WeverseMomentIE,
|
||||||
|
WeverseLiveTabIE,
|
||||||
|
WeverseMediaTabIE,
|
||||||
|
WeverseLiveIE,
|
||||||
|
)
|
||||||
from .wevidi import WeVidiIE
|
from .wevidi import WeVidiIE
|
||||||
|
from .weyyak import WeyyakIE
|
||||||
from .whyp import WhypIE
|
from .whyp import WhypIE
|
||||||
from .wikimedia import WikimediaIE
|
from .wikimedia import WikimediaIE
|
||||||
from .willow import WillowIE
|
from .willow import WillowIE
|
||||||
|
@ -2344,6 +2372,12 @@
|
||||||
WSJArticleIE,
|
WSJArticleIE,
|
||||||
)
|
)
|
||||||
from .wwe import WWEIE
|
from .wwe import WWEIE
|
||||||
|
from .wykop import (
|
||||||
|
WykopDigIE,
|
||||||
|
WykopDigCommentIE,
|
||||||
|
WykopPostIE,
|
||||||
|
WykopPostCommentIE,
|
||||||
|
)
|
||||||
from .xanimu import XanimuIE
|
from .xanimu import XanimuIE
|
||||||
from .xbef import XBefIE
|
from .xbef import XBefIE
|
||||||
from .xboxclips import XboxClipsIE
|
from .xboxclips import XboxClipsIE
|
||||||
|
@ -2463,6 +2497,7 @@
|
||||||
ZingMp3WeekChartIE,
|
ZingMp3WeekChartIE,
|
||||||
ZingMp3ChartMusicVideoIE,
|
ZingMp3ChartMusicVideoIE,
|
||||||
ZingMp3UserIE,
|
ZingMp3UserIE,
|
||||||
|
ZingMp3HubIE,
|
||||||
)
|
)
|
||||||
from .zoom import ZoomIE
|
from .zoom import ZoomIE
|
||||||
from .zype import ZypeIE
|
from .zype import ZypeIE
|
||||||
|
|
|
@ -3,6 +3,8 @@
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
remove_start,
|
||||||
|
traverse_obj,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
@ -72,7 +74,14 @@ def _extract_aetn_info(self, domain, filter_key, filter_value, url):
|
||||||
requestor_id, brand = self._DOMAIN_MAP[domain]
|
requestor_id, brand = self._DOMAIN_MAP[domain]
|
||||||
result = self._download_json(
|
result = self._download_json(
|
||||||
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
|
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
|
||||||
filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0]
|
filter_value, query={'filter[%s]' % filter_key: filter_value})
|
||||||
|
result = traverse_obj(
|
||||||
|
result, ('results',
|
||||||
|
lambda k, v: k == 0 and v[filter_key] == filter_value),
|
||||||
|
get_all=False)
|
||||||
|
if not result:
|
||||||
|
raise ExtractorError('Show not found in A&E feed (too new?)', expected=True,
|
||||||
|
video_id=remove_start(filter_value, '/'))
|
||||||
title = result['title']
|
title = result['title']
|
||||||
video_id = result['id']
|
video_id = result['id']
|
||||||
media_url = result['publicUrl']
|
media_url = result['publicUrl']
|
||||||
|
@ -123,7 +132,7 @@ class AENetworksIE(AENetworksBaseIE):
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'add_ie': ['ThePlatform'],
|
'add_ie': ['ThePlatform'],
|
||||||
'skip': 'This video is only available for users of participating TV providers.',
|
'skip': 'Geo-restricted - This content is not available in your location.'
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
|
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -140,6 +149,7 @@ class AENetworksIE(AENetworksBaseIE):
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'add_ie': ['ThePlatform'],
|
'add_ie': ['ThePlatform'],
|
||||||
|
'skip': 'This video is only available for users of participating TV providers.',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
|
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
|
@ -303,6 +313,7 @@ def _real_extract(self, url):
|
||||||
class HistoryPlayerIE(AENetworksBaseIE):
|
class HistoryPlayerIE(AENetworksBaseIE):
|
||||||
IE_NAME = 'history:player'
|
IE_NAME = 'history:player'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
|
||||||
|
_TESTS = []
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
domain, video_id = self._match_valid_url(url).groups()
|
domain, video_id = self._match_valid_url(url).groups()
|
||||||
|
|
|
@ -336,7 +336,7 @@ def _get_anvato_videos(self, access_key, video_id, token):
|
||||||
elif media_format == 'm3u8-variant' or ext == 'm3u8':
|
elif media_format == 'm3u8-variant' or ext == 'm3u8':
|
||||||
# For some videos the initial m3u8 URL returns JSON instead
|
# For some videos the initial m3u8 URL returns JSON instead
|
||||||
manifest_json = self._download_json(
|
manifest_json = self._download_json(
|
||||||
video_url, video_id, note='Downloading manifest JSON', errnote=False)
|
video_url, video_id, note='Downloading manifest JSON', fatal=False)
|
||||||
if manifest_json:
|
if manifest_json:
|
||||||
video_url = manifest_json.get('master_m3u8')
|
video_url = manifest_json.get('master_m3u8')
|
||||||
if not video_url:
|
if not video_url:
|
||||||
|
@ -392,14 +392,6 @@ def _extract_from_webpage(cls, url, webpage):
|
||||||
url = smuggle_url(url, {'token': anvplayer_data['token']})
|
url = smuggle_url(url, {'token': anvplayer_data['token']})
|
||||||
yield cls.url_result(url, AnvatoIE, video_id)
|
yield cls.url_result(url, AnvatoIE, video_id)
|
||||||
|
|
||||||
def _extract_anvato_videos(self, webpage, video_id):
|
|
||||||
anvplayer_data = self._parse_json(
|
|
||||||
self._html_search_regex(
|
|
||||||
self._ANVP_RE, webpage, 'Anvato player data', group='anvp'),
|
|
||||||
video_id)
|
|
||||||
return self._get_anvato_videos(
|
|
||||||
anvplayer_data['accessKey'], anvplayer_data['video'], 'default') # cbslocal token = 'default'
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url, smuggled_data = unsmuggle_url(url, {})
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
self._initialize_geo_bypass({
|
self._initialize_geo_bypass({
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
try_get,
|
try_get,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
|
update_url,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
|
@ -408,6 +409,23 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
(?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''
|
(?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy8xZGY0ZGJmZS00ZWQwLTRmMGItYjhhYy0wOGQ4ZmYxNjVhZDI',
|
||||||
|
'md5': '3fd5fead7a370a819341129c8d713136',
|
||||||
|
'info_dict': {
|
||||||
|
'display_id': 'filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen',
|
||||||
|
'id': '12172961',
|
||||||
|
'title': 'Wolfsland - Die traurigen Schwestern',
|
||||||
|
'description': r're:^Als der Polizeiobermeister Raaben',
|
||||||
|
'duration': 5241,
|
||||||
|
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:efa186f7b0054957',
|
||||||
|
'timestamp': 1670710500,
|
||||||
|
'upload_date': '20221210',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'age_limit': 12,
|
||||||
|
'episode': 'Wolfsland - Die traurigen Schwestern',
|
||||||
|
'series': 'Filme im MDR'
|
||||||
|
},
|
||||||
|
}, {
|
||||||
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
|
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
|
||||||
'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
|
'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -424,7 +442,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
'skip': 'Error',
|
'skip': 'Error',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.ardmediathek.de/video/tagesschau-oder-tagesschau-20-00-uhr/das-erste/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll',
|
'url': 'https://www.ardmediathek.de/video/tagesschau-oder-tagesschau-20-00-uhr/das-erste/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll',
|
||||||
'md5': 'f1837e563323b8a642a8ddeff0131f51',
|
'md5': '1e73ded21cb79bac065117e80c81dc88',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '10049223',
|
'id': '10049223',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -432,13 +450,11 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
'timestamp': 1636398000,
|
'timestamp': 1636398000,
|
||||||
'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b',
|
'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b',
|
||||||
'upload_date': '20211108',
|
'upload_date': '20211108',
|
||||||
},
|
'display_id': 'tagesschau-oder-tagesschau-20-00-uhr/das-erste',
|
||||||
}, {
|
'duration': 915,
|
||||||
'url': 'https://www.ardmediathek.de/sendung/beforeigners/beforeigners/staffel-1/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JlZm9yZWlnbmVycw/1',
|
'episode': 'tagesschau, 20:00 Uhr',
|
||||||
'playlist_count': 6,
|
'series': 'tagesschau',
|
||||||
'info_dict': {
|
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49',
|
||||||
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JlZm9yZWlnbmVycw',
|
|
||||||
'title': 'beforeigners/beforeigners/staffel-1',
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||||
|
@ -602,6 +618,9 @@ def _real_extract(self, url):
|
||||||
show {
|
show {
|
||||||
title
|
title
|
||||||
}
|
}
|
||||||
|
image {
|
||||||
|
src
|
||||||
|
}
|
||||||
synopsis
|
synopsis
|
||||||
title
|
title
|
||||||
tracking {
|
tracking {
|
||||||
|
@ -640,6 +659,15 @@ def _real_extract(self, url):
|
||||||
'description': description,
|
'description': description,
|
||||||
'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
|
'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
|
||||||
'series': try_get(player_page, lambda x: x['show']['title']),
|
'series': try_get(player_page, lambda x: x['show']['title']),
|
||||||
|
'thumbnail': (media_collection.get('_previewImage')
|
||||||
|
or try_get(player_page, lambda x: update_url(x['image']['src'], query=None, fragment=None))
|
||||||
|
or self.get_thumbnail_from_html(display_id, url)),
|
||||||
})
|
})
|
||||||
info.update(self._ARD_extract_episode_info(info['title']))
|
info.update(self._ARD_extract_episode_info(info['title']))
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
def get_thumbnail_from_html(self, display_id, url):
|
||||||
|
webpage = self._download_webpage(url, display_id, fatal=False) or ''
|
||||||
|
return (
|
||||||
|
self._og_search_thumbnail(webpage, default=None)
|
||||||
|
or self._html_search_meta('thumbnailUrl', webpage, default=None))
|
||||||
|
|
|
@ -1,27 +1,197 @@
|
||||||
|
from functools import partial
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
clean_html,
|
||||||
|
determine_ext,
|
||||||
|
format_field,
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
orderedSet,
|
||||||
|
parse_iso8601,
|
||||||
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class BibelTVIE(InfoExtractor):
|
class BibelTVBaseIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?:crn/)?(?P<id>\d+)'
|
_GEO_COUNTRIES = ['AT', 'CH', 'DE']
|
||||||
_TESTS = [{
|
_GEO_BYPASS = False
|
||||||
'url': 'https://www.bibeltv.de/mediathek/videos/329703-sprachkurs-in-malaiisch',
|
|
||||||
'md5': '252f908192d611de038b8504b08bf97f',
|
API_URL = 'https://www.bibeltv.de/mediathek/api'
|
||||||
'info_dict': {
|
AUTH_TOKEN = 'j88bRXY8DsEqJ9xmTdWhrByVi5Hm'
|
||||||
'id': 'ref:329703',
|
|
||||||
'ext': 'mp4',
|
def _extract_formats_and_subtitles(self, data, crn_id, *, is_live=False):
|
||||||
'title': 'Sprachkurs in Malaiisch',
|
formats = []
|
||||||
'description': 'md5:3e9f197d29ee164714e67351cf737dfe',
|
subtitles = {}
|
||||||
'timestamp': 1608316701,
|
for media_url in traverse_obj(data, (..., 'src', {url_or_none})):
|
||||||
'uploader_id': '5840105145001',
|
media_ext = determine_ext(media_url)
|
||||||
'upload_date': '20201218',
|
if media_ext == 'm3u8':
|
||||||
|
m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
media_url, crn_id, live=is_live)
|
||||||
|
formats.extend(m3u8_formats)
|
||||||
|
subtitles.update(m3u8_subs)
|
||||||
|
elif media_ext == 'mpd':
|
||||||
|
mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(media_url, crn_id)
|
||||||
|
formats.extend(mpd_formats)
|
||||||
|
subtitles.update(mpd_subs)
|
||||||
|
elif media_ext == 'mp4':
|
||||||
|
formats.append({'url': media_url})
|
||||||
|
else:
|
||||||
|
self.report_warning(f'Unknown format {media_ext!r}')
|
||||||
|
|
||||||
|
return formats, subtitles
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_base_info(data):
|
||||||
|
return {
|
||||||
|
'id': data['crn'],
|
||||||
|
**traverse_obj(data, {
|
||||||
|
'title': 'title',
|
||||||
|
'description': 'description',
|
||||||
|
'duration': ('duration', {partial(int_or_none, scale=1000)}),
|
||||||
|
'timestamp': ('schedulingStart', {parse_iso8601}),
|
||||||
|
'season_number': 'seasonNumber',
|
||||||
|
'episode_number': 'episodeNumber',
|
||||||
|
'view_count': 'viewCount',
|
||||||
|
'like_count': 'likeCount',
|
||||||
|
}),
|
||||||
|
'thumbnails': orderedSet(traverse_obj(data, ('images', ..., {
|
||||||
|
'url': ('url', {url_or_none}),
|
||||||
|
}))),
|
||||||
}
|
}
|
||||||
}, {
|
|
||||||
'url': 'https://www.bibeltv.de/mediathek/videos/crn/326374',
|
def _extract_url_info(self, data):
|
||||||
'only_matching': True,
|
return {
|
||||||
|
'_type': 'url',
|
||||||
|
'url': format_field(data, 'slug', 'https://www.bibeltv.de/mediathek/videos/%s'),
|
||||||
|
**self._extract_base_info(data),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_video_info(self, data):
|
||||||
|
crn_id = data['crn']
|
||||||
|
|
||||||
|
if data.get('drm'):
|
||||||
|
self.report_drm(crn_id)
|
||||||
|
|
||||||
|
json_data = self._download_json(
|
||||||
|
format_field(data, 'id', f'{self.API_URL}/video/%s'), crn_id,
|
||||||
|
headers={'Authorization': self.AUTH_TOKEN}, fatal=False,
|
||||||
|
errnote='No formats available') or {}
|
||||||
|
|
||||||
|
formats, subtitles = self._extract_formats_and_subtitles(
|
||||||
|
traverse_obj(json_data, ('video', 'videoUrls', ...)), crn_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'video',
|
||||||
|
**self._extract_base_info(data),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class BibelTVVideoIE(BibelTVBaseIE):
|
||||||
|
IE_DESC = 'BibelTV single video'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?P<id>\d+)[\w-]+'
|
||||||
|
IE_NAME = 'bibeltv:video'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.bibeltv.de/mediathek/videos/344436-alte-wege',
|
||||||
|
'md5': 'ec1c07efe54353780512e8a4103b612e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '344436',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Alte Wege',
|
||||||
|
'description': 'md5:2f4eb7294c9797a47b8fd13cccca22e9',
|
||||||
|
'timestamp': 1677877071,
|
||||||
|
'duration': 150.0,
|
||||||
|
'upload_date': '20230303',
|
||||||
|
'thumbnail': r're:https://bibeltv\.imgix\.net/[\w-]+\.jpg',
|
||||||
|
'episode': 'Episode 1',
|
||||||
|
'episode_number': 1,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': '6',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5840105145001/default_default/index.html?videoId=ref:%s'
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
crn_id = self._match_id(url)
|
crn_id = self._match_id(url)
|
||||||
return self.url_result(
|
video_data = traverse_obj(
|
||||||
self.BRIGHTCOVE_URL_TEMPLATE % crn_id, 'BrightcoveNew')
|
self._search_nextjs_data(self._download_webpage(url, crn_id), crn_id),
|
||||||
|
('props', 'pageProps', 'videoPageData', 'videos', 0, {dict}))
|
||||||
|
if not video_data:
|
||||||
|
raise ExtractorError('Missing video data.')
|
||||||
|
|
||||||
|
return self._extract_video_info(video_data)
|
||||||
|
|
||||||
|
|
||||||
|
class BibelTVSeriesIE(BibelTVBaseIE):
|
||||||
|
IE_DESC = 'BibelTV series playlist'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/serien/(?P<id>\d+)[\w-]+'
|
||||||
|
IE_NAME = 'bibeltv:series'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.bibeltv.de/mediathek/serien/333485-ein-wunder-fuer-jeden-tag',
|
||||||
|
'playlist_mincount': 400,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '333485',
|
||||||
|
'title': 'Ein Wunder für jeden Tag',
|
||||||
|
'description': 'Tägliche Kurzandacht mit Déborah Rosenkranz.',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
crn_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, crn_id)
|
||||||
|
nextjs_data = self._search_nextjs_data(webpage, crn_id)
|
||||||
|
series_data = traverse_obj(nextjs_data, ('props', 'pageProps', 'seriePageData', {dict}))
|
||||||
|
if not series_data:
|
||||||
|
raise ExtractorError('Missing series data.')
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
traverse_obj(series_data, ('videos', ..., {dict}, {self._extract_url_info})),
|
||||||
|
crn_id, series_data.get('title'), clean_html(series_data.get('description')))
|
||||||
|
|
||||||
|
|
||||||
|
class BibelTVLiveIE(BibelTVBaseIE):
|
||||||
|
IE_DESC = 'BibelTV live program'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bibeltv\.de/livestreams/(?P<id>[\w-]+)'
|
||||||
|
IE_NAME = 'bibeltv:live'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.bibeltv.de/livestreams/bibeltv/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'bibeltv',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 're:Bibel TV',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'thumbnail': 'https://streampreview.bibeltv.de/bibeltv.webp',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bibeltv.de/livestreams/impuls/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
stream_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, stream_id)
|
||||||
|
stream_data = self._search_json(
|
||||||
|
r'\\"video\\":', webpage, 'bibeltvData', stream_id,
|
||||||
|
transform_source=lambda jstring: js_to_json(jstring.replace('\\"', '"')))
|
||||||
|
|
||||||
|
formats, subtitles = self._extract_formats_and_subtitles(
|
||||||
|
traverse_obj(stream_data, ('src', ...)), stream_id, is_live=True)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': stream_id,
|
||||||
|
'title': stream_data.get('title'),
|
||||||
|
'thumbnail': stream_data.get('poster'),
|
||||||
|
'is_live': True,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
import base64
|
import base64
|
||||||
import functools
|
import functools
|
||||||
|
import hashlib
|
||||||
import itertools
|
import itertools
|
||||||
import math
|
import math
|
||||||
|
import time
|
||||||
import urllib.error
|
import urllib.error
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
|
@ -26,6 +28,7 @@
|
||||||
srt_subtitles_timecode,
|
srt_subtitles_timecode,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
|
try_call,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
@ -514,19 +517,63 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||||
'id': '3985676',
|
'id': '3985676',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 178,
|
'playlist_mincount': 178,
|
||||||
|
}, {
|
||||||
|
'url': 'https://space.bilibili.com/313580179/video',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '313580179',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 92,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _extract_signature(self, playlist_id):
|
||||||
|
session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
|
||||||
|
|
||||||
|
key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
|
||||||
|
img_key = traverse_obj(
|
||||||
|
session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
|
||||||
|
sub_key = traverse_obj(
|
||||||
|
session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
|
||||||
|
|
||||||
|
session_key = img_key + sub_key
|
||||||
|
|
||||||
|
signature_values = []
|
||||||
|
for position in (
|
||||||
|
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
|
||||||
|
12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
|
||||||
|
57, 62, 11, 36, 20, 34, 44, 52
|
||||||
|
):
|
||||||
|
char_at_position = try_call(lambda: session_key[position])
|
||||||
|
if char_at_position:
|
||||||
|
signature_values.append(char_at_position)
|
||||||
|
|
||||||
|
return ''.join(signature_values)[:32]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
|
playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
|
||||||
if not is_video_url:
|
if not is_video_url:
|
||||||
self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
|
self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
|
||||||
'To download audios, add a "/audio" to the URL')
|
'To download audios, add a "/audio" to the URL')
|
||||||
|
|
||||||
|
signature = self._extract_signature(playlist_id)
|
||||||
|
|
||||||
def fetch_page(page_idx):
|
def fetch_page(page_idx):
|
||||||
|
query = {
|
||||||
|
'keyword': '',
|
||||||
|
'mid': playlist_id,
|
||||||
|
'order': 'pubdate',
|
||||||
|
'order_avoided': 'true',
|
||||||
|
'platform': 'web',
|
||||||
|
'pn': page_idx + 1,
|
||||||
|
'ps': 30,
|
||||||
|
'tid': 0,
|
||||||
|
'web_location': 1550101,
|
||||||
|
'wts': int(time.time()),
|
||||||
|
}
|
||||||
|
query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = self._download_json('https://api.bilibili.com/x/space/arc/search',
|
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
|
||||||
playlist_id, note=f'Downloading page {page_idx}',
|
playlist_id, note=f'Downloading page {page_idx}', query=query)
|
||||||
query={'mid': playlist_id, 'pn': page_idx + 1, 'jsonp': 'jsonp'})
|
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 412:
|
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 412:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
|
@ -556,9 +603,9 @@ def get_entries(page_data):
|
||||||
class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
|
class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
|
||||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
|
_VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://space.bilibili.com/3985676/audio',
|
'url': 'https://space.bilibili.com/313580179/audio',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3985676',
|
'id': '313580179',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 1,
|
'playlist_mincount': 1,
|
||||||
}]
|
}]
|
||||||
|
|
|
@ -1,86 +0,0 @@
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import int_or_none, str_or_none, traverse_obj
|
|
||||||
|
|
||||||
|
|
||||||
class BooyahBaseIE(InfoExtractor):
|
|
||||||
_BOOYAH_SESSION_KEY = None
|
|
||||||
|
|
||||||
def _real_initialize(self):
|
|
||||||
BooyahBaseIE._BOOYAH_SESSION_KEY = self._request_webpage(
|
|
||||||
'https://booyah.live/api/v3/auths/sessions', None, data=b'').getheader('booyah-session-key')
|
|
||||||
|
|
||||||
def _get_comments(self, video_id):
|
|
||||||
comment_json = self._download_json(
|
|
||||||
f'https://booyah.live/api/v3/playbacks/{video_id}/comments/tops', video_id,
|
|
||||||
headers={'Booyah-Session-Key': self._BOOYAH_SESSION_KEY}, fatal=False) or {}
|
|
||||||
|
|
||||||
return [{
|
|
||||||
'id': comment.get('comment_id'),
|
|
||||||
'author': comment.get('from_nickname'),
|
|
||||||
'author_id': comment.get('from_uid'),
|
|
||||||
'author_thumbnail': comment.get('from_thumbnail'),
|
|
||||||
'text': comment.get('content'),
|
|
||||||
'timestamp': comment.get('create_time'),
|
|
||||||
'like_count': comment.get('like_cnt'),
|
|
||||||
} for comment in comment_json.get('comment_list') or ()]
|
|
||||||
|
|
||||||
|
|
||||||
class BooyahClipsIE(BooyahBaseIE):
|
|
||||||
_VALID_URL = r'https?://booyah.live/clips/(?P<id>\d+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://booyah.live/clips/13887261322952306617',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '13887261322952306617',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'view_count': int,
|
|
||||||
'duration': 30,
|
|
||||||
'channel_id': 90565760,
|
|
||||||
'like_count': int,
|
|
||||||
'title': 'Cayendo con estilo 😎',
|
|
||||||
'uploader': '♡LɪꜱGΛMER',
|
|
||||||
'comment_count': int,
|
|
||||||
'uploader_id': '90565760',
|
|
||||||
'thumbnail': 'https://resmambet-a.akamaihd.net/mambet-storage/Clip/90565760/90565760-27204374-fba0-409d-9d7b-63a48b5c0e75.jpg',
|
|
||||||
'upload_date': '20220617',
|
|
||||||
'timestamp': 1655490556,
|
|
||||||
'modified_timestamp': 1655490556,
|
|
||||||
'modified_date': '20220617',
|
|
||||||
}
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
json_data = self._download_json(
|
|
||||||
f'https://booyah.live/api/v3/playbacks/{video_id}', video_id,
|
|
||||||
headers={'Booyah-Session-key': self._BOOYAH_SESSION_KEY})
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for video_data in json_data['playback']['endpoint_list']:
|
|
||||||
formats.extend(({
|
|
||||||
'url': video_data.get('stream_url'),
|
|
||||||
'ext': 'mp4',
|
|
||||||
'height': video_data.get('resolution'),
|
|
||||||
}, {
|
|
||||||
'url': video_data.get('download_url'),
|
|
||||||
'ext': 'mp4',
|
|
||||||
'format_note': 'Watermarked',
|
|
||||||
'height': video_data.get('resolution'),
|
|
||||||
'preference': -10,
|
|
||||||
}))
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': traverse_obj(json_data, ('playback', 'name')),
|
|
||||||
'thumbnail': traverse_obj(json_data, ('playback', 'thumbnail_url')),
|
|
||||||
'formats': formats,
|
|
||||||
'view_count': traverse_obj(json_data, ('playback', 'views')),
|
|
||||||
'like_count': traverse_obj(json_data, ('playback', 'likes')),
|
|
||||||
'duration': traverse_obj(json_data, ('playback', 'duration')),
|
|
||||||
'comment_count': traverse_obj(json_data, ('playback', 'comment_cnt')),
|
|
||||||
'channel_id': traverse_obj(json_data, ('playback', 'channel_id')),
|
|
||||||
'uploader': traverse_obj(json_data, ('user', 'nickname')),
|
|
||||||
'uploader_id': str_or_none(traverse_obj(json_data, ('user', 'uid'))),
|
|
||||||
'modified_timestamp': int_or_none(traverse_obj(json_data, ('playback', 'update_time_ms')), 1000),
|
|
||||||
'timestamp': int_or_none(traverse_obj(json_data, ('playback', 'create_time_ms')), 1000),
|
|
||||||
'__post_extractor': self.extract_comments(video_id, self._get_comments(video_id)),
|
|
||||||
}
|
|
|
@ -1,5 +1,6 @@
|
||||||
from .adobepass import AdobePassIE
|
from .adobepass import AdobePassIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
HEADRequest,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
get_element_html_by_class,
|
get_element_html_by_class,
|
||||||
|
@ -153,8 +154,11 @@ def _real_extract(self, url):
|
||||||
if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')):
|
if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')):
|
||||||
chapters = None
|
chapters = None
|
||||||
|
|
||||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
m3u8_url = self._request_webpage(HEADRequest(
|
||||||
update_url_query(f'{tp_url}/stream.m3u8', query), video_id, 'mp4', m3u8_id='hls')
|
update_url_query(f'{tp_url}/stream.m3u8', query)), video_id, 'Checking m3u8 URL').geturl()
|
||||||
|
if 'mpeg_cenc' in m3u8_url:
|
||||||
|
self.report_drm(video_id)
|
||||||
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
|
85
yt_dlp/extractor/camfm.py
Normal file
85
yt_dlp/extractor/camfm.py
Normal file
|
@ -0,0 +1,85 @@
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
get_element_by_class,
|
||||||
|
get_elements_by_class,
|
||||||
|
join_nonempty,
|
||||||
|
traverse_obj,
|
||||||
|
unified_timestamp,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CamFMShowIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https://(?:www\.)?camfm\.co\.uk/shows/(?P<id>[^/]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
'url': 'https://camfm.co.uk/shows/soul-mining/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'soul-mining',
|
||||||
|
'thumbnail': 'md5:6a873091f92c936f23bdcce80f75e66a',
|
||||||
|
'title': 'Soul Mining',
|
||||||
|
'description': 'Telling the stories of jazz, funk and soul from all corners of the world.',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
show_id = self._match_id(url)
|
||||||
|
page = self._download_webpage(url, show_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': show_id,
|
||||||
|
'entries': [self.url_result(urljoin('https://camfm.co.uk', i), CamFMEpisodeIE)
|
||||||
|
for i in re.findall(r"javascript:popup\('(/player/[^']+)', 'listen'", page)],
|
||||||
|
'thumbnail': urljoin('https://camfm.co.uk', self._search_regex(
|
||||||
|
r'<img[^>]+class="thumb-expand"[^>]+src="([^"]+)"', page, 'thumbnail', fatal=False)),
|
||||||
|
'title': self._html_search_regex('<h1>([^<]+)</h1>', page, 'title', fatal=False),
|
||||||
|
'description': clean_html(get_element_by_class('small-12 medium-8 cell', page))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CamFMEpisodeIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https://(?:www\.)?camfm\.co\.uk/player/(?P<id>[^/]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://camfm.co.uk/player/43336',
|
||||||
|
'skip': 'Episode will expire - don\'t actually know when, but it will go eventually',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '43336',
|
||||||
|
'title': 'AITAA: Am I the Agony Aunt? - 19:00 Tue 16/05/2023',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'upload_date': '20230516',
|
||||||
|
'description': 'md5:f165144f94927c0f1bfa2ee6e6ab7bbf',
|
||||||
|
'timestamp': 1684263600,
|
||||||
|
'series': 'AITAA: Am I the Agony Aunt?',
|
||||||
|
'thumbnail': 'md5:5980a831360d0744c3764551be3d09c1',
|
||||||
|
'categories': ['Entertainment'],
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
episode_id = self._match_id(url)
|
||||||
|
page = self._download_webpage(url, episode_id)
|
||||||
|
audios = self._parse_html5_media_entries('https://audio.camfm.co.uk', page, episode_id)
|
||||||
|
|
||||||
|
caption = get_element_by_class('caption', page)
|
||||||
|
series = clean_html(re.sub(r'<span[^<]+<[^<]+>', '', caption))
|
||||||
|
|
||||||
|
card_section = get_element_by_class('card-section', page)
|
||||||
|
date = self._html_search_regex('>Aired at ([^<]+)<', card_section, 'air date', fatal=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': episode_id,
|
||||||
|
'title': join_nonempty(series, date, delim=' - '),
|
||||||
|
'formats': traverse_obj(audios, (..., 'formats', ...)),
|
||||||
|
'timestamp': unified_timestamp(date), # XXX: Does not account for UK's daylight savings
|
||||||
|
'series': series,
|
||||||
|
'description': clean_html(re.sub(r'<b>[^<]+</b><br[^>]+/>', '', card_section)),
|
||||||
|
'thumbnail': urljoin('https://camfm.co.uk', self._search_regex(
|
||||||
|
r'<div[^>]+class="cover-art"[^>]+style="[^"]+url\(\'([^\']+)',
|
||||||
|
page, 'thumbnail', fatal=False)),
|
||||||
|
'categories': get_elements_by_class('label', caption),
|
||||||
|
'was_live': True,
|
||||||
|
}
|
|
@ -1,383 +0,0 @@
|
||||||
import json
|
|
||||||
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from .gigya import GigyaBaseIE
|
|
||||||
from ..compat import compat_HTTPError
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
clean_html,
|
|
||||||
extract_attributes,
|
|
||||||
float_or_none,
|
|
||||||
get_element_by_class,
|
|
||||||
int_or_none,
|
|
||||||
merge_dicts,
|
|
||||||
str_or_none,
|
|
||||||
strip_or_none,
|
|
||||||
url_or_none,
|
|
||||||
urlencode_postdata
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class CanvasIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza|dako)/assets/(?P<id>[^/?#&]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
|
||||||
'md5': '37b2b7bb9b3dcaa05b67058dc3a714a9',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
|
||||||
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Nachtwacht: De Greystook',
|
|
||||||
'description': 'Nachtwacht: De Greystook',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'duration': 1468.02,
|
|
||||||
},
|
|
||||||
'expected_warnings': ['is not a supported codec'],
|
|
||||||
}, {
|
|
||||||
'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
_GEO_BYPASS = False
|
|
||||||
_HLS_ENTRY_PROTOCOLS_MAP = {
|
|
||||||
'HLS': 'm3u8_native',
|
|
||||||
'HLS_AES': 'm3u8_native',
|
|
||||||
}
|
|
||||||
_REST_API_BASE = 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v2'
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = self._match_valid_url(url)
|
|
||||||
site_id, video_id = mobj.group('site_id'), mobj.group('id')
|
|
||||||
|
|
||||||
data = None
|
|
||||||
if site_id != 'vrtvideo':
|
|
||||||
# Old API endpoint, serves more formats but may fail for some videos
|
|
||||||
data = self._download_json(
|
|
||||||
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
|
|
||||||
% (site_id, video_id), video_id, 'Downloading asset JSON',
|
|
||||||
'Unable to download asset JSON', fatal=False)
|
|
||||||
|
|
||||||
# New API endpoint
|
|
||||||
if not data:
|
|
||||||
vrtnutoken = self._download_json('https://token.vrt.be/refreshtoken',
|
|
||||||
video_id, note='refreshtoken: Retrieve vrtnutoken',
|
|
||||||
errnote='refreshtoken failed')['vrtnutoken']
|
|
||||||
headers = self.geo_verification_headers()
|
|
||||||
headers.update({'Content-Type': 'application/json; charset=utf-8'})
|
|
||||||
vrtPlayerToken = self._download_json(
|
|
||||||
'%s/tokens' % self._REST_API_BASE, video_id,
|
|
||||||
'Downloading token', headers=headers, data=json.dumps({
|
|
||||||
'identityToken': vrtnutoken
|
|
||||||
}).encode('utf-8'))['vrtPlayerToken']
|
|
||||||
data = self._download_json(
|
|
||||||
'%s/videos/%s' % (self._REST_API_BASE, video_id),
|
|
||||||
video_id, 'Downloading video JSON', query={
|
|
||||||
'vrtPlayerToken': vrtPlayerToken,
|
|
||||||
'client': 'null',
|
|
||||||
}, expected_status=400)
|
|
||||||
if 'title' not in data:
|
|
||||||
code = data.get('code')
|
|
||||||
if code == 'AUTHENTICATION_REQUIRED':
|
|
||||||
self.raise_login_required()
|
|
||||||
elif code == 'INVALID_LOCATION':
|
|
||||||
self.raise_geo_restricted(countries=['BE'])
|
|
||||||
raise ExtractorError(data.get('message') or code, expected=True)
|
|
||||||
|
|
||||||
# Note: The title may be an empty string
|
|
||||||
title = data['title'] or f'{site_id} {video_id}'
|
|
||||||
description = data.get('description')
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
subtitles = {}
|
|
||||||
for target in data['targetUrls']:
|
|
||||||
format_url, format_type = url_or_none(target.get('url')), str_or_none(target.get('type'))
|
|
||||||
if not format_url or not format_type:
|
|
||||||
continue
|
|
||||||
format_type = format_type.upper()
|
|
||||||
if format_type in self._HLS_ENTRY_PROTOCOLS_MAP:
|
|
||||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
|
||||||
format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type],
|
|
||||||
m3u8_id=format_type, fatal=False)
|
|
||||||
formats.extend(fmts)
|
|
||||||
subtitles = self._merge_subtitles(subtitles, subs)
|
|
||||||
elif format_type == 'HDS':
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
format_url, video_id, f4m_id=format_type, fatal=False))
|
|
||||||
elif format_type == 'MPEG_DASH':
|
|
||||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
|
||||||
format_url, video_id, mpd_id=format_type, fatal=False)
|
|
||||||
formats.extend(fmts)
|
|
||||||
subtitles = self._merge_subtitles(subtitles, subs)
|
|
||||||
elif format_type == 'HSS':
|
|
||||||
fmts, subs = self._extract_ism_formats_and_subtitles(
|
|
||||||
format_url, video_id, ism_id='mss', fatal=False)
|
|
||||||
formats.extend(fmts)
|
|
||||||
subtitles = self._merge_subtitles(subtitles, subs)
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'format_id': format_type,
|
|
||||||
'url': format_url,
|
|
||||||
})
|
|
||||||
|
|
||||||
subtitle_urls = data.get('subtitleUrls')
|
|
||||||
if isinstance(subtitle_urls, list):
|
|
||||||
for subtitle in subtitle_urls:
|
|
||||||
subtitle_url = subtitle.get('url')
|
|
||||||
if subtitle_url and subtitle.get('type') == 'CLOSED':
|
|
||||||
subtitles.setdefault('nl', []).append({'url': subtitle_url})
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'display_id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'formats': formats,
|
|
||||||
'duration': float_or_none(data.get('duration'), 1000),
|
|
||||||
'thumbnail': data.get('posterImageUrl'),
|
|
||||||
'subtitles': subtitles,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class CanvasEenIE(InfoExtractor):
|
|
||||||
IE_DESC = 'canvas.be and een.be'
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
|
|
||||||
'md5': 'ed66976748d12350b118455979cca293',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
|
|
||||||
'display_id': 'de-afspraak-veilt-voor-de-warmste-week',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'De afspraak veilt voor de Warmste Week',
|
|
||||||
'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'duration': 49.02,
|
|
||||||
},
|
|
||||||
'expected_warnings': ['is not a supported codec'],
|
|
||||||
}, {
|
|
||||||
# with subtitles
|
|
||||||
'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'mz-ast-5240ff21-2d30-4101-bba6-92b5ec67c625',
|
|
||||||
'display_id': 'pieter-0167',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Pieter 0167',
|
|
||||||
'description': 'md5:943cd30f48a5d29ba02c3a104dc4ec4e',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'duration': 2553.08,
|
|
||||||
'subtitles': {
|
|
||||||
'nl': [{
|
|
||||||
'ext': 'vtt',
|
|
||||||
}],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'skip': 'Pagina niet gevonden',
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.een.be/thuis/emma-pakt-thilly-aan',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'md-ast-3a24ced2-64d7-44fb-b4ed-ed1aafbf90b8',
|
|
||||||
'display_id': 'emma-pakt-thilly-aan',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Emma pakt Thilly aan',
|
|
||||||
'description': 'md5:c5c9b572388a99b2690030afa3f3bad7',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'duration': 118.24,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'expected_warnings': ['is not a supported codec'],
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = self._match_valid_url(url)
|
|
||||||
site_id, display_id = mobj.group('site_id'), mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
title = strip_or_none(self._search_regex(
|
|
||||||
r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
|
|
||||||
webpage, 'title', default=None) or self._og_search_title(
|
|
||||||
webpage, default=None))
|
|
||||||
|
|
||||||
video_id = self._html_search_regex(
|
|
||||||
r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
|
|
||||||
group='id')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (site_id, video_id),
|
|
||||||
'ie_key': CanvasIE.ie_key(),
|
|
||||||
'id': video_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'title': title,
|
|
||||||
'description': self._og_search_description(webpage),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class VrtNUIE(GigyaBaseIE):
|
|
||||||
IE_DESC = 'VrtNU.be'
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?vrt\.be/vrtnu/a-z/(?:[^/]+/){2}(?P<id>[^/?#&]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
# Available via old API endpoint
|
|
||||||
'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1989/postbus-x-s1989a1/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'pbs-pub-e8713dac-899e-41de-9313-81269f4c04ac$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Postbus X - Aflevering 1 (Seizoen 1989)',
|
|
||||||
'description': 'md5:b704f669eb9262da4c55b33d7c6ed4b7',
|
|
||||||
'duration': 1457.04,
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'series': 'Postbus X',
|
|
||||||
'season': 'Seizoen 1989',
|
|
||||||
'season_number': 1989,
|
|
||||||
'episode': 'De zwarte weduwe',
|
|
||||||
'episode_number': 1,
|
|
||||||
'timestamp': 1595822400,
|
|
||||||
'upload_date': '20200727',
|
|
||||||
},
|
|
||||||
'skip': 'This video is only available for registered users',
|
|
||||||
'expected_warnings': ['is not a supported codec'],
|
|
||||||
}, {
|
|
||||||
# Only available via new API endpoint
|
|
||||||
'url': 'https://www.vrt.be/vrtnu/a-z/kamp-waes/1/kamp-waes-s1a5/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'pbs-pub-0763b56c-64fb-4d38-b95b-af60bf433c71$vid-ad36a73c-4735-4f1f-b2c0-a38e6e6aa7e1',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Aflevering 5',
|
|
||||||
'description': 'Wie valt door de mand tijdens een missie?',
|
|
||||||
'duration': 2967.06,
|
|
||||||
'season': 'Season 1',
|
|
||||||
'season_number': 1,
|
|
||||||
'episode_number': 5,
|
|
||||||
},
|
|
||||||
'skip': 'This video is only available for registered users',
|
|
||||||
'expected_warnings': ['Unable to download asset JSON', 'is not a supported codec', 'Unknown MIME type'],
|
|
||||||
}]
|
|
||||||
_NETRC_MACHINE = 'vrtnu'
|
|
||||||
_APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
|
|
||||||
_CONTEXT_ID = 'R3595707040'
|
|
||||||
|
|
||||||
def _perform_login(self, username, password):
|
|
||||||
auth_info = self._gigya_login({
|
|
||||||
'APIKey': self._APIKEY,
|
|
||||||
'targetEnv': 'jssdk',
|
|
||||||
'loginID': username,
|
|
||||||
'password': password,
|
|
||||||
'authMode': 'cookie',
|
|
||||||
})
|
|
||||||
|
|
||||||
if auth_info.get('errorDetails'):
|
|
||||||
raise ExtractorError('Unable to login: VrtNU said: ' + auth_info.get('errorDetails'), expected=True)
|
|
||||||
|
|
||||||
# Sometimes authentication fails for no good reason, retry
|
|
||||||
login_attempt = 1
|
|
||||||
while login_attempt <= 3:
|
|
||||||
try:
|
|
||||||
self._request_webpage('https://token.vrt.be/vrtnuinitlogin',
|
|
||||||
None, note='Requesting XSRF Token', errnote='Could not get XSRF Token',
|
|
||||||
query={'provider': 'site', 'destination': 'https://www.vrt.be/vrtnu/'})
|
|
||||||
|
|
||||||
post_data = {
|
|
||||||
'UID': auth_info['UID'],
|
|
||||||
'UIDSignature': auth_info['UIDSignature'],
|
|
||||||
'signatureTimestamp': auth_info['signatureTimestamp'],
|
|
||||||
'_csrf': self._get_cookies('https://login.vrt.be').get('OIDCXSRF').value,
|
|
||||||
}
|
|
||||||
|
|
||||||
self._request_webpage(
|
|
||||||
'https://login.vrt.be/perform_login',
|
|
||||||
None, note='Performing login', errnote='perform login failed',
|
|
||||||
headers={}, query={
|
|
||||||
'client_id': 'vrtnu-site'
|
|
||||||
}, data=urlencode_postdata(post_data))
|
|
||||||
|
|
||||||
except ExtractorError as e:
|
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
|
||||||
login_attempt += 1
|
|
||||||
self.report_warning('Authentication failed')
|
|
||||||
self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again')
|
|
||||||
else:
|
|
||||||
raise e
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
attrs = extract_attributes(self._search_regex(
|
|
||||||
r'(<nui-media[^>]+>)', webpage, 'media element'))
|
|
||||||
video_id = attrs['videoid']
|
|
||||||
publication_id = attrs.get('publicationid')
|
|
||||||
if publication_id:
|
|
||||||
video_id = publication_id + '$' + video_id
|
|
||||||
|
|
||||||
page = (self._parse_json(self._search_regex(
|
|
||||||
r'digitalData\s*=\s*({.+?});', webpage, 'digial data',
|
|
||||||
default='{}'), video_id, fatal=False) or {}).get('page') or {}
|
|
||||||
|
|
||||||
info = self._search_json_ld(webpage, display_id, default={})
|
|
||||||
return merge_dicts(info, {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
|
|
||||||
'ie_key': CanvasIE.ie_key(),
|
|
||||||
'id': video_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'season_number': int_or_none(page.get('episode_season')),
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
class DagelijkseKostIE(InfoExtractor):
|
|
||||||
IE_DESC = 'dagelijksekost.een.be'
|
|
||||||
_VALID_URL = r'https?://dagelijksekost\.een\.be/gerechten/(?P<id>[^/?#&]+)'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof',
|
|
||||||
'md5': '30bfffc323009a3e5f689bef6efa2365',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa',
|
|
||||||
'display_id': 'hachis-parmentier-met-witloof',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Hachis parmentier met witloof',
|
|
||||||
'description': 'md5:9960478392d87f63567b5b117688cdc5',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'duration': 283.02,
|
|
||||||
},
|
|
||||||
'expected_warnings': ['is not a supported codec'],
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
title = strip_or_none(get_element_by_class(
|
|
||||||
'dish-metadata__title', webpage
|
|
||||||
) or self._html_search_meta(
|
|
||||||
'twitter:title', webpage))
|
|
||||||
|
|
||||||
description = clean_html(get_element_by_class(
|
|
||||||
'dish-description', webpage)
|
|
||||||
) or self._html_search_meta(
|
|
||||||
('description', 'twitter:description', 'og:description'),
|
|
||||||
webpage)
|
|
||||||
|
|
||||||
video_id = self._html_search_regex(
|
|
||||||
r'data-url=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
|
|
||||||
group='id')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': 'https://mediazone.vrt.be/api/v1/dako/assets/%s' % video_id,
|
|
||||||
'ie_key': CanvasIE.ie_key(),
|
|
||||||
'id': video_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
}
|
|
|
@ -351,7 +351,9 @@ def _find_secret_formats(self, formats, video_id):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
video_info = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/assets/' + video_id, video_id)
|
video_info = self._download_json(
|
||||||
|
f'https://services.radio-canada.ca/ott/cbc-api/v2/assets/{video_id}',
|
||||||
|
video_id, expected_status=426)
|
||||||
|
|
||||||
email, password = self._get_login_info()
|
email, password = self._get_login_info()
|
||||||
if email and password:
|
if email and password:
|
||||||
|
@ -426,7 +428,7 @@ def _real_extract(self, url):
|
||||||
match = self._match_valid_url(url)
|
match = self._match_valid_url(url)
|
||||||
season_id = match.group('id')
|
season_id = match.group('id')
|
||||||
show = match.group('show')
|
show = match.group('show')
|
||||||
show_info = self._download_json(self._API_BASE + show, season_id)
|
show_info = self._download_json(self._API_BASE + show, season_id, expected_status=426)
|
||||||
season = int(match.group('season'))
|
season = int(match.group('season'))
|
||||||
|
|
||||||
season_info = next((s for s in show_info['seasons'] if s.get('season') == season), None)
|
season_info = next((s for s in show_info['seasons'] if s.get('season') == season), None)
|
||||||
|
|
|
@ -1,116 +0,0 @@
|
||||||
from .anvato import AnvatoIE
|
|
||||||
from .sendtonews import SendtoNewsIE
|
|
||||||
from ..compat import compat_urlparse
|
|
||||||
from ..utils import (
|
|
||||||
parse_iso8601,
|
|
||||||
unified_timestamp,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class CBSLocalIE(AnvatoIE): # XXX: Do not subclass from concrete IE
|
|
||||||
_VALID_URL_BASE = r'https?://[a-z]+\.cbslocal\.com/'
|
|
||||||
_VALID_URL = _VALID_URL_BASE + r'video/(?P<id>\d+)'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '3580809',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'A Very Blue Anniversary',
|
|
||||||
'description': 'CBS2’s Cindy Hsu has more.',
|
|
||||||
'thumbnail': 're:^https?://.*',
|
|
||||||
'timestamp': int,
|
|
||||||
'upload_date': r're:^\d{8}$',
|
|
||||||
'uploader': 'CBS',
|
|
||||||
'subtitles': {
|
|
||||||
'en': 'mincount:5',
|
|
||||||
},
|
|
||||||
'categories': [
|
|
||||||
'Stations\\Spoken Word\\WCBSTV',
|
|
||||||
'Syndication\\AOL',
|
|
||||||
'Syndication\\MSN',
|
|
||||||
'Syndication\\NDN',
|
|
||||||
'Syndication\\Yahoo',
|
|
||||||
'Content\\News',
|
|
||||||
'Content\\News\\Local News',
|
|
||||||
],
|
|
||||||
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mcp_id = self._match_id(url)
|
|
||||||
return self.url_result(
|
|
||||||
'anvato:anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67:' + mcp_id, 'Anvato', mcp_id)
|
|
||||||
|
|
||||||
|
|
||||||
class CBSLocalArticleIE(AnvatoIE): # XXX: Do not subclass from concrete IE
|
|
||||||
_VALID_URL = CBSLocalIE._VALID_URL_BASE + r'\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
# Anvato backend
|
|
||||||
'url': 'http://losangeles.cbslocal.com/2016/05/16/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis',
|
|
||||||
'md5': 'f0ee3081e3843f575fccef901199b212',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '3401037',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'',
|
|
||||||
'description': 'Collapsing seats have been the focus of scrutiny for decades, though experts say remarkably little has been done to address the issue. Randy Paige reports.',
|
|
||||||
'thumbnail': 're:^https?://.*',
|
|
||||||
'timestamp': 1463440500,
|
|
||||||
'upload_date': '20160516',
|
|
||||||
'uploader': 'CBS',
|
|
||||||
'subtitles': {
|
|
||||||
'en': 'mincount:5',
|
|
||||||
},
|
|
||||||
'categories': [
|
|
||||||
'Stations\\Spoken Word\\KCBSTV',
|
|
||||||
'Syndication\\MSN',
|
|
||||||
'Syndication\\NDN',
|
|
||||||
'Syndication\\AOL',
|
|
||||||
'Syndication\\Yahoo',
|
|
||||||
'Syndication\\Tribune',
|
|
||||||
'Syndication\\Curb.tv',
|
|
||||||
'Content\\News'
|
|
||||||
],
|
|
||||||
'tags': ['CBS 2 News Evening'],
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
# SendtoNews embed
|
|
||||||
'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'GxfCe0Zo7D-175909-5588',
|
|
||||||
},
|
|
||||||
'playlist_count': 9,
|
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
sendtonews_url = SendtoNewsIE._extract_url(webpage)
|
|
||||||
if sendtonews_url:
|
|
||||||
return self.url_result(
|
|
||||||
compat_urlparse.urljoin(url, sendtonews_url),
|
|
||||||
ie=SendtoNewsIE.ie_key())
|
|
||||||
|
|
||||||
info_dict = self._extract_anvato_videos(webpage, display_id)
|
|
||||||
|
|
||||||
timestamp = unified_timestamp(self._html_search_regex(
|
|
||||||
r'class="(?:entry|post)-date"[^>]*>([^<]+)', webpage,
|
|
||||||
'released date', default=None)) or parse_iso8601(
|
|
||||||
self._html_search_meta('uploadDate', webpage))
|
|
||||||
|
|
||||||
info_dict.update({
|
|
||||||
'display_id': display_id,
|
|
||||||
'timestamp': timestamp,
|
|
||||||
})
|
|
||||||
|
|
||||||
return info_dict
|
|
|
@ -1,36 +1,153 @@
|
||||||
|
import base64
|
||||||
import re
|
import re
|
||||||
|
import urllib.error
|
||||||
|
import urllib.parse
|
||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
|
from .anvato import AnvatoIE
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .cbs import CBSIE
|
from .paramountplus import ParamountPlusIE
|
||||||
from ..compat import (
|
|
||||||
compat_b64decode,
|
|
||||||
compat_urllib_parse_unquote,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
HEADRequest,
|
||||||
|
UserNotLive,
|
||||||
|
determine_ext,
|
||||||
|
float_or_none,
|
||||||
|
format_field,
|
||||||
|
int_or_none,
|
||||||
|
make_archive_id,
|
||||||
|
mimetype2ext,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
smuggle_url,
|
||||||
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CBSNewsEmbedIE(CBSIE): # XXX: Do not subclass from concrete IE
|
class CBSNewsBaseIE(InfoExtractor):
|
||||||
|
_LOCALES = {
|
||||||
|
'atlanta': None,
|
||||||
|
'baltimore': 'BAL',
|
||||||
|
'boston': 'BOS',
|
||||||
|
'chicago': 'CHI',
|
||||||
|
'colorado': 'DEN',
|
||||||
|
'detroit': 'DET',
|
||||||
|
'losangeles': 'LA',
|
||||||
|
'miami': 'MIA',
|
||||||
|
'minnesota': 'MIN',
|
||||||
|
'newyork': 'NY',
|
||||||
|
'philadelphia': 'PHI',
|
||||||
|
'pittsburgh': 'PIT',
|
||||||
|
'sacramento': 'SAC',
|
||||||
|
'sanfrancisco': 'SF',
|
||||||
|
'texas': 'DAL',
|
||||||
|
}
|
||||||
|
_LOCALE_RE = '|'.join(map(re.escape, _LOCALES))
|
||||||
|
_ANVACK = '5VD6Eyd6djewbCmNwBFnsJj17YAvGRwl'
|
||||||
|
|
||||||
|
def _get_item(self, webpage, display_id):
|
||||||
|
return traverse_obj(self._search_json(
|
||||||
|
r'CBSNEWS\.defaultPayload\s*=', webpage, 'payload', display_id,
|
||||||
|
default={}), ('items', 0, {dict})) or {}
|
||||||
|
|
||||||
|
def _get_video_url(self, item):
|
||||||
|
return traverse_obj(item, 'video', 'video2', expected_type=url_or_none)
|
||||||
|
|
||||||
|
def _extract_playlist(self, webpage, playlist_id):
|
||||||
|
entries = [self.url_result(embed_url, CBSNewsEmbedIE) for embed_url in re.findall(
|
||||||
|
r'<iframe[^>]+data-src="(https?://(?:www\.)?cbsnews\.com/embed/video/[^#]*#[^"]+)"', webpage)]
|
||||||
|
if entries:
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, self._html_search_meta(['og:title', 'twitter:title'], webpage),
|
||||||
|
self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage))
|
||||||
|
|
||||||
|
def _extract_video(self, item, video_url, video_id):
|
||||||
|
if mimetype2ext(item.get('format'), default=determine_ext(video_url)) == 'mp4':
|
||||||
|
formats = [{'url': video_url, 'ext': 'mp4'}]
|
||||||
|
|
||||||
|
else:
|
||||||
|
manifest = self._download_webpage(video_url, video_id, note='Downloading m3u8 information')
|
||||||
|
|
||||||
|
anvato_id = self._search_regex(r'anvato-(\d+)', manifest, 'Anvato ID', default=None)
|
||||||
|
# Prefer Anvato if available; cbsnews.com m3u8 formats are re-encoded from Anvato source
|
||||||
|
if anvato_id:
|
||||||
|
return self.url_result(
|
||||||
|
smuggle_url(f'anvato:{self._ANVACK}:{anvato_id}', {'token': 'default'}),
|
||||||
|
AnvatoIE, url_transparent=True, _old_archive_ids=[make_archive_id(self, anvato_id)])
|
||||||
|
|
||||||
|
formats, _ = self._parse_m3u8_formats_and_subtitles(
|
||||||
|
manifest, video_url, 'mp4', m3u8_id='hls', video_id=video_id)
|
||||||
|
|
||||||
|
def get_subtitles(subs_url):
|
||||||
|
return {
|
||||||
|
'en': [{
|
||||||
|
'url': subs_url,
|
||||||
|
'ext': 'dfxp', # TTAF1
|
||||||
|
}],
|
||||||
|
} if url_or_none(subs_url) else None
|
||||||
|
|
||||||
|
episode_meta = traverse_obj(item, {
|
||||||
|
'season_number': ('season', {int_or_none}),
|
||||||
|
'episode_number': ('episode', {int_or_none}),
|
||||||
|
}) if item.get('isFullEpisode') else {}
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
**traverse_obj(item, {
|
||||||
|
'title': (None, ('fulltitle', 'title')),
|
||||||
|
'description': 'dek',
|
||||||
|
'timestamp': ('timestamp', {lambda x: float_or_none(x, 1000)}),
|
||||||
|
'duration': ('duration', {float_or_none}),
|
||||||
|
'subtitles': ('captions', {get_subtitles}),
|
||||||
|
'thumbnail': ('images', ('hd', 'sd'), {url_or_none}),
|
||||||
|
'is_live': ('type', {lambda x: x == 'live'}),
|
||||||
|
}, get_all=False),
|
||||||
|
**episode_meta,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CBSNewsEmbedIE(CBSNewsBaseIE):
|
||||||
IE_NAME = 'cbsnews:embed'
|
IE_NAME = 'cbsnews:embed'
|
||||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/embed/video[^#]*#(?P<id>.+)'
|
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/embed/video[^#]*#(?P<id>.+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.cbsnews.com/embed/video/?v=1.c9b5b61492913d6660db0b2f03579ef25e86307a#1Vb7b9s2EP5XBAHbT6Gt98PAMKTJ0se6LVjWYWtdGBR1stlIpEBSTtwi%2F%2FvuJNkNhmHdGxgM2NL57vjd6zt%2B8PngdN%2Fyg79qeGvhzN%2FLGrS%2F%2BuBLB531V28%2B%2BO7Qg7%2Fy97r2z3xZ42NW8yLhDbA0S0KWlHnIijwKWJBHZZnHBa8Cgbpdf%2F89NM9Hi9fXifhpr8sr%2FlP848tn%2BTdXycX25zh4cdX%2FvHl6PmmPqnWQv9w8Ed%2B9GjYRim07bFEqdG%2BZVHuwTm65A7bVRrYtR5lAyMox7pigF6W4k%2By91mjspGsJ%2BwVae4%2BsvdnaO1p73HkXs%2FVisUDTGm7R8IcdnOROeq%2B19qT1amhA1VJtPenoTUgrtfKc9m7Rq8dP7nnjwOB7wg7ADdNt7VX64DWAWlKhPtmDEq22g4GF99x6Dk9E8OSsankHXqPNKDxC%2FdK7MLKTircTDgsI3mmj4OBdSq64dy7fd1x577RU1rt4cvMtOaulFYOd%2FLewRWvDO9lIgXFpZSnkZmjbv5SxKTPoQXClFbpsf%2Fhbbpzs0IB3vb8KkyzJQ%2BywOAgCrMpgRrz%2BKk4fvb7kFbR4XJCu0gAdtNO7woCwZTu%2BBUs9bam%2Fds71drVerpeisgrubLjAB4nnOSkWQnfr5W6o1ku5Xpr1MgrCbL0M0vUyDtfLLK15WiYp47xKWSLyjFVpwVmVJSLIoCjSOFkv3W7oKsVliwZJcB9nwXpZ5GEQQwY8jNKqKCBrgjTLeFxgdCIpazojDgnRtn43J6kG7nZ6cAbxh0EeFFk4%2B1u867cY5u4344n%2FxXjCqAjucdTHgLKojNKmSfO8KRsOFY%2FzKEYCKEJBzv90QA9nfm9gL%2BHulaFqUkz9ULUYxl62B3U%2FRVNLA8IhggaPycOoBuwOCESciDQVSSUgiOMsROB%2FhKfwCKOzEk%2B4k6rWd4uuT%2FwTDz7K7t3d3WLO8ISD95jSPQbayBacthbz86XVgxHwhex5zawzgDOmtp%2F3GPcXn0VXHdSS029%2Fj99UC%2FwJUvyKQ%2FzKyixIEVlYJOn4RxxuaH43Ty9fbJ5OObykHH435XAzJTHeOF4hhEUXD8URe%2FQ%2FBT%2BMpf8d5GN02Ox%2FfiGsl7TA7POu1xZ5%2BbTzcAVKMe48mqcC21hkacVEVScM26liVVBnrKkC4CLKyzAvHu0lhEaTKMFwI3a4SN9MsrfYzdBLq2vkwRD1gVviLT8kY9h2CHH6Y%2Bix6609weFtey4ESp60WtyeWMy%2BsmBuhsoKIyuoT%2Bq2R%2FrW5qi3g%2FvzS2j40DoixDP8%2BKP0yUdpXJ4l6Vla%2Bg9vce%2BC4yM5YlUcbA%2F0jLKdpmTwvsdN5z88nAIe08%2F0HgxeG1iv%2B6Hlhjh7uiW0SDzYNI92L401uha3JKYk268UVRzdOzNQvAaJqoXzAc80dAV440NZ1WVVAAMRYQ2KrGJFmDUsq8saWSnjvIj8t78y%2FRa3JRnbHVfyFpfwoDiGpPgjzekyUiKNlU3OMlwuLMmzgvEojllYVE2Z1HhImvsnk%2BuhusTEoB21PAtSFodeFK3iYhXEH9WOG2%2FkOE833sfeG%2Ff5cfHtEFNXgYes0%2FXj7aGivUgJ9XpusCtoNcNYVVnJVrrDo0OmJAutHCpuZul4W9lLcfy7BnuLPT02%2ByXsCTk%2B9zhzswIN04YueNSK%2BPtM0jS88QdLqSLJDTLsuGZJNolm2yO0PXh3UPnz9Ix5bfIAqxPjvETQsDCEiPG4QbqNyhBZISxybLnZYCrW5H3Axp690%2F0BJdXtDZ5ITuM4xj3f4oUHGzc5JeJmZKpp%2FjwKh4wMV%2FV1yx3emLoR0MwbG4K%2F%2BZgVep3PnzXGDHZ6a3i%2Fk%2BJrONDN13%2Bnq6tBTYk4o7cLGhBtqCC4KwacGHpEVuoH5JNro%2FE6JfE6d5RydbiR76k%2BW5wioDHBIjw1euhHjUGRB0y5A97KoaPx6MlL%2BwgboUVtUFRI%2FLemgTpdtF59ii7pab08kuPcfWzs0l%2FRI5takWnFpka0zOgWRtYcuf9aIxZMxlwr6IiGpsb6j2DQUXPl%2FimXI599Ev7fWjoPD78A',
|
'url': 'https://www.cbsnews.com/embed/video/?v=1.c9b5b61492913d6660db0b2f03579ef25e86307a#1Vb7b9s2EP5XBAHbT6Gt98PAMKTJ0se6LVjWYWtdGBR1stlIpEBSTtwi%2F%2FvuJNkNhmHdGxgM2NL57vjd6zt%2B8PngdN%2Fyg79qeGvhzN%2FLGrS%2F%2BuBLB531V28%2B%2BO7Qg7%2Fy97r2z3xZ42NW8yLhDbA0S0KWlHnIijwKWJBHZZnHBa8Cgbpdf%2F89NM9Hi9fXifhpr8sr%2FlP848tn%2BTdXycX25zh4cdX%2FvHl6PmmPqnWQv9w8Ed%2B9GjYRim07bFEqdG%2BZVHuwTm65A7bVRrYtR5lAyMox7pigF6W4k%2By91mjspGsJ%2BwVae4%2BsvdnaO1p73HkXs%2FVisUDTGm7R8IcdnOROeq%2B19qT1amhA1VJtPenoTUgrtfKc9m7Rq8dP7nnjwOB7wg7ADdNt7VX64DWAWlKhPtmDEq22g4GF99x6Dk9E8OSsankHXqPNKDxC%2FdK7MLKTircTDgsI3mmj4OBdSq64dy7fd1x577RU1rt4cvMtOaulFYOd%2FLewRWvDO9lIgXFpZSnkZmjbv5SxKTPoQXClFbpsf%2Fhbbpzs0IB3vb8KkyzJQ%2BywOAgCrMpgRrz%2BKk4fvb7kFbR4XJCu0gAdtNO7woCwZTu%2BBUs9bam%2Fds71drVerpeisgrubLjAB4nnOSkWQnfr5W6o1ku5Xpr1MgrCbL0M0vUyDtfLLK15WiYp47xKWSLyjFVpwVmVJSLIoCjSOFkv3W7oKsVliwZJcB9nwXpZ5GEQQwY8jNKqKCBrgjTLeFxgdCIpazojDgnRtn43J6kG7nZ6cAbxh0EeFFk4%2B1u867cY5u4344n%2FxXjCqAjucdTHgLKojNKmSfO8KRsOFY%2FzKEYCKEJBzv90QA9nfm9gL%2BHulaFqUkz9ULUYxl62B3U%2FRVNLA8IhggaPycOoBuwOCESciDQVSSUgiOMsROB%2FhKfwCKOzEk%2B4k6rWd4uuT%2FwTDz7K7t3d3WLO8ISD95jSPQbayBacthbz86XVgxHwhex5zawzgDOmtp%2F3GPcXn0VXHdSS029%2Fj99UC%2FwJUvyKQ%2FzKyixIEVlYJOn4RxxuaH43Ty9fbJ5OObykHH435XAzJTHeOF4hhEUXD8URe%2FQ%2FBT%2BMpf8d5GN02Ox%2FfiGsl7TA7POu1xZ5%2BbTzcAVKMe48mqcC21hkacVEVScM26liVVBnrKkC4CLKyzAvHu0lhEaTKMFwI3a4SN9MsrfYzdBLq2vkwRD1gVviLT8kY9h2CHH6Y%2Bix6609weFtey4ESp60WtyeWMy%2BsmBuhsoKIyuoT%2Bq2R%2FrW5qi3g%2FvzS2j40DoixDP8%2BKP0yUdpXJ4l6Vla%2Bg9vce%2BC4yM5YlUcbA%2F0jLKdpmTwvsdN5z88nAIe08%2F0HgxeG1iv%2B6Hlhjh7uiW0SDzYNI92L401uha3JKYk268UVRzdOzNQvAaJqoXzAc80dAV440NZ1WVVAAMRYQ2KrGJFmDUsq8saWSnjvIj8t78y%2FRa3JRnbHVfyFpfwoDiGpPgjzekyUiKNlU3OMlwuLMmzgvEojllYVE2Z1HhImvsnk%2BuhusTEoB21PAtSFodeFK3iYhXEH9WOG2%2FkOE833sfeG%2Ff5cfHtEFNXgYes0%2FXj7aGivUgJ9XpusCtoNcNYVVnJVrrDo0OmJAutHCpuZul4W9lLcfy7BnuLPT02%2ByXsCTk%2B9zhzswIN04YueNSK%2BPtM0jS88QdLqSLJDTLsuGZJNolm2yO0PXh3UPnz9Ix5bfIAqxPjvETQsDCEiPG4QbqNyhBZISxybLnZYCrW5H3Axp690%2F0BJdXtDZ5ITuM4xj3f4oUHGzc5JeJmZKpp%2FjwKh4wMV%2FV1yx3emLoR0MwbG4K%2F%2BZgVep3PnzXGDHZ6a3i%2Fk%2BJrONDN13%2Bnq6tBTYk4o7cLGhBtqCC4KwacGHpEVuoH5JNro%2FE6JfE6d5RydbiR76k%2BW5wioDHBIjw1euhHjUGRB0y5A97KoaPx6MlL%2BwgboUVtUFRI%2FLemgTpdtF59ii7pab08kuPcfWzs0l%2FRI5takWnFpka0zOgWRtYcuf9aIxZMxlwr6IiGpsb6j2DQUXPl%2FimXI599Ev7fWjoPD78A',
|
||||||
'only_matching': True,
|
'info_dict': {
|
||||||
|
'id': '6ZP4cXvo9FaX3VLH7MF4CgY30JFpY_GA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Cops investigate gorilla incident at Cincinnati Zoo',
|
||||||
|
'description': 'md5:fee7441ab8aaeb3c693482394738102b',
|
||||||
|
'duration': 350,
|
||||||
|
'timestamp': 1464719713,
|
||||||
|
'upload_date': '20160531',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
item = self._parse_json(zlib.decompress(compat_b64decode(
|
item = traverse_obj(self._parse_json(zlib.decompress(base64.b64decode(
|
||||||
compat_urllib_parse_unquote(self._match_id(url))),
|
urllib.parse.unquote(self._match_id(url))),
|
||||||
-zlib.MAX_WBITS).decode('utf-8'), None)['video']['items'][0]
|
-zlib.MAX_WBITS).decode(), None), ('video', 'items', 0, {dict})) or {}
|
||||||
return self._extract_video_info(item['mpxRefId'], 'cbsnews')
|
|
||||||
|
video_id = item['mpxRefId']
|
||||||
|
video_url = self._get_video_url(item)
|
||||||
|
if not video_url:
|
||||||
|
# Old embeds redirect user to ParamountPlus but most links are 404
|
||||||
|
pplus_url = f'https://www.paramountplus.com/shows/video/{video_id}'
|
||||||
|
try:
|
||||||
|
self._request_webpage(HEADRequest(pplus_url), video_id)
|
||||||
|
return self.url_result(pplus_url, ParamountPlusIE)
|
||||||
|
except ExtractorError:
|
||||||
|
self.raise_no_formats('This video is no longer available', True, video_id)
|
||||||
|
|
||||||
|
return self._extract_video(item, video_url, video_id)
|
||||||
|
|
||||||
|
|
||||||
class CBSNewsIE(CBSIE): # XXX: Do not subclass from concrete IE
|
class CBSNewsIE(CBSNewsBaseIE):
|
||||||
IE_NAME = 'cbsnews'
|
IE_NAME = 'cbsnews'
|
||||||
IE_DESC = 'CBS News'
|
IE_DESC = 'CBS News'
|
||||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|video)/(?P<id>[\da-z_-]+)'
|
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|video)/(?P<id>[\w-]+)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
|
@ -47,10 +164,7 @@ class CBSNewsIE(CBSIE): # XXX: Do not subclass from concrete IE
|
||||||
'timestamp': 1476046464,
|
'timestamp': 1476046464,
|
||||||
'upload_date': '20161009',
|
'upload_date': '20161009',
|
||||||
},
|
},
|
||||||
'params': {
|
'skip': 'This video is no longer available',
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'https://www.cbsnews.com/video/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
|
'url': 'https://www.cbsnews.com/video/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
|
||||||
|
@ -61,48 +175,234 @@ class CBSNewsIE(CBSIE): # XXX: Do not subclass from concrete IE
|
||||||
'description': 'md5:4a6983e480542d8b333a947bfc64ddc7',
|
'description': 'md5:4a6983e480542d8b333a947bfc64ddc7',
|
||||||
'upload_date': '20140404',
|
'upload_date': '20140404',
|
||||||
'timestamp': 1396650660,
|
'timestamp': 1396650660,
|
||||||
'uploader': 'CBSI-NEW',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'duration': 205,
|
'duration': 205,
|
||||||
'subtitles': {
|
'subtitles': {
|
||||||
'en': [{
|
'en': [{
|
||||||
'ext': 'ttml',
|
'ext': 'dfxp',
|
||||||
}],
|
}],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
'skip_download': 'm3u8',
|
||||||
'skip_download': True,
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# 48 hours
|
# 48 hours
|
||||||
'url': 'http://www.cbsnews.com/news/maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved/',
|
'url': 'http://www.cbsnews.com/news/maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved',
|
||||||
'title': 'Cold as Ice',
|
'title': 'Cold as Ice',
|
||||||
'description': 'Can a childhood memory solve the 1957 murder of 7-year-old Maria Ridulph?',
|
'description': 'Can a childhood memory solve the 1957 murder of 7-year-old Maria Ridulph?',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 7,
|
'playlist_mincount': 7,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://www.cbsnews.com/video/032823-cbs-evening-news/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '_2wuO7hD9LwtyM_TwSnVwnKp6kxlcXgE',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'CBS Evening News, March 28, 2023',
|
||||||
|
'description': 'md5:db20615aae54adc1d55a1fd69dc75d13',
|
||||||
|
'duration': 1189,
|
||||||
|
'timestamp': 1680042600,
|
||||||
|
'upload_date': '20230328',
|
||||||
|
'season': 'Season 2023',
|
||||||
|
'season_number': 2023,
|
||||||
|
'episode': 'Episode 83',
|
||||||
|
'episode_number': 83,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'm3u8',
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
entries = []
|
playlist = self._extract_playlist(webpage, display_id)
|
||||||
for embed_url in re.findall(r'<iframe[^>]+data-src="(https?://(?:www\.)?cbsnews\.com/embed/video/[^#]*#[^"]+)"', webpage):
|
if playlist:
|
||||||
entries.append(self.url_result(embed_url, CBSNewsEmbedIE.ie_key()))
|
return playlist
|
||||||
if entries:
|
|
||||||
return self.playlist_result(
|
|
||||||
entries, playlist_title=self._html_search_meta(['og:title', 'twitter:title'], webpage),
|
|
||||||
playlist_description=self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage))
|
|
||||||
|
|
||||||
item = self._parse_json(self._html_search_regex(
|
item = self._get_item(webpage, display_id)
|
||||||
r'CBSNEWS\.defaultPayload\s*=\s*({.+})',
|
video_id = item.get('mpxRefId') or display_id
|
||||||
webpage, 'video JSON info'), display_id)['items'][0]
|
video_url = self._get_video_url(item)
|
||||||
return self._extract_video_info(item['mpxRefId'], 'cbsnews')
|
if not video_url:
|
||||||
|
self.raise_no_formats('No video content was found', expected=True, video_id=video_id)
|
||||||
|
|
||||||
|
return self._extract_video(item, video_url, video_id)
|
||||||
|
|
||||||
|
|
||||||
|
class CBSLocalBaseIE(CBSNewsBaseIE):
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
item = self._get_item(webpage, display_id)
|
||||||
|
video_id = item.get('mpxRefId') or display_id
|
||||||
|
anvato_id = None
|
||||||
|
video_url = self._get_video_url(item)
|
||||||
|
|
||||||
|
if not video_url:
|
||||||
|
anv_params = self._search_regex(
|
||||||
|
r'<iframe[^>]+\bdata-src="https?://w3\.mp\.lura\.live/player/prod/v3/anvload\.html\?key=([^"]+)"',
|
||||||
|
webpage, 'Anvato URL', default=None)
|
||||||
|
|
||||||
|
if not anv_params:
|
||||||
|
playlist = self._extract_playlist(webpage, display_id)
|
||||||
|
if playlist:
|
||||||
|
return playlist
|
||||||
|
self.raise_no_formats('No video content was found', expected=True, video_id=video_id)
|
||||||
|
|
||||||
|
anv_data = self._parse_json(base64.urlsafe_b64decode(f'{anv_params}===').decode(), video_id)
|
||||||
|
anvato_id = anv_data['v']
|
||||||
|
return self.url_result(
|
||||||
|
smuggle_url(f'anvato:{anv_data.get("anvack") or self._ANVACK}:{anvato_id}', {
|
||||||
|
'token': anv_data.get('token') or 'default',
|
||||||
|
}), AnvatoIE, url_transparent=True, _old_archive_ids=[make_archive_id(self, anvato_id)])
|
||||||
|
|
||||||
|
return self._extract_video(item, video_url, video_id)
|
||||||
|
|
||||||
|
|
||||||
|
class CBSLocalIE(CBSLocalBaseIE):
|
||||||
|
_VALID_URL = rf'https?://(?:www\.)?cbsnews\.com/(?:{CBSNewsBaseIE._LOCALE_RE})/(?:live/)?video/(?P<id>[\w-]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# Anvato video via defaultPayload JSON
|
||||||
|
'url': 'https://www.cbsnews.com/newyork/video/1st-cannabis-dispensary-opens-in-queens/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6376747',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '1st cannabis dispensary opens in Queens',
|
||||||
|
'description': 'The dispensary is women-owned and located in Jamaica.',
|
||||||
|
'uploader': 'CBS',
|
||||||
|
'duration': 20,
|
||||||
|
'timestamp': 1680193657,
|
||||||
|
'upload_date': '20230330',
|
||||||
|
'categories': ['Stations\\Spoken Word\\WCBSTV', 'Content\\Google', 'Content\\News', 'Content\\News\\Local News'],
|
||||||
|
'tags': 'count:11',
|
||||||
|
'thumbnail': 're:^https?://.*',
|
||||||
|
'_old_archive_ids': ['cbslocal 6376747'],
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
# cbsnews.com video via defaultPayload JSON
|
||||||
|
'url': 'https://www.cbsnews.com/newyork/live/video/20230330171655-the-city-is-sounding-the-alarm-on-dangerous-social-media-challenges/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sJqfw7YvgSC6ant2zVmzt3y1jYKoL5J3',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'the city is sounding the alarm on dangerous social media challenges',
|
||||||
|
'description': 'md5:8eccc9b1b73be5138a52e9c4350d2cd6',
|
||||||
|
'thumbnail': 'https://images-cbsn.cbsnews.com/prod/2023/03/30/story_22509622_1680196925.jpg',
|
||||||
|
'duration': 41.0,
|
||||||
|
'timestamp': 1680196615,
|
||||||
|
'upload_date': '20230330',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class CBSLocalArticleIE(CBSLocalBaseIE):
|
||||||
|
_VALID_URL = rf'https?://(?:www\.)?cbsnews\.com/(?:{CBSNewsBaseIE._LOCALE_RE})/news/(?P<id>[\w-]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# Anvato video via iframe embed
|
||||||
|
'url': 'https://www.cbsnews.com/newyork/news/mta-station-agents-leaving-their-booths-to-provide-more-direct-customer-service/',
|
||||||
|
'playlist_count': 2,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'mta-station-agents-leaving-their-booths-to-provide-more-direct-customer-service',
|
||||||
|
'title': 'MTA station agents begin leaving their booths to provide more direct customer service',
|
||||||
|
'description': 'The more than 2,200 agents will provide face-to-face customer service to passengers.',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cbsnews.com/losangeles/news/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis/',
|
||||||
|
'md5': 'f0ee3081e3843f575fccef901199b212',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3401037',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'',
|
||||||
|
'thumbnail': 're:^https?://.*',
|
||||||
|
'timestamp': 1463440500,
|
||||||
|
'upload_date': '20160516',
|
||||||
|
},
|
||||||
|
'skip': 'Video has been removed',
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class CBSNewsLiveBaseIE(CBSNewsBaseIE):
|
||||||
|
def _get_id(self, url):
|
||||||
|
raise NotImplementedError('This method must be implemented by subclasses')
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._get_id(url)
|
||||||
|
if not video_id:
|
||||||
|
raise ExtractorError('Livestream is not available', expected=True)
|
||||||
|
|
||||||
|
data = traverse_obj(self._download_json(
|
||||||
|
'https://feeds-cbsn.cbsnews.com/2.0/rundown/', video_id, query={
|
||||||
|
'partner': 'cbsnsite',
|
||||||
|
'edition': video_id,
|
||||||
|
'type': 'live',
|
||||||
|
}), ('navigation', 'data', 0, {dict}))
|
||||||
|
|
||||||
|
video_url = traverse_obj(data, (('videoUrlDAI', ('videoUrl', 'base')), {url_or_none}), get_all=False)
|
||||||
|
if not video_url:
|
||||||
|
raise UserNotLive(video_id=video_id)
|
||||||
|
|
||||||
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4', m3u8_id='hls')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'is_live': True,
|
||||||
|
**traverse_obj(data, {
|
||||||
|
'title': 'headline',
|
||||||
|
'description': 'rundown_slug',
|
||||||
|
'thumbnail': ('images', 'thumbnail_url_hd', {url_or_none}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CBSLocalLiveIE(CBSNewsLiveBaseIE):
|
||||||
|
_VALID_URL = rf'https?://(?:www\.)?cbsnews\.com/(?P<id>{CBSNewsBaseIE._LOCALE_RE})/live/?(?:[?#]|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.cbsnews.com/losangeles/live/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'CBSN-LA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': str,
|
||||||
|
'description': r're:KCBS/CBSN_LA.CRISPIN.\w+.RUNDOWN \w+ \w+',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _get_id(self, url):
|
||||||
|
return format_field(self._LOCALES, self._match_id(url), 'CBSN-%s')
|
||||||
|
|
||||||
|
|
||||||
|
class CBSNewsLiveIE(CBSNewsLiveBaseIE):
|
||||||
|
IE_NAME = 'cbsnews:live'
|
||||||
|
IE_DESC = 'CBS News Livestream'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/?(?:[?#]|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.cbsnews.com/live/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'CBSN-US',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': str,
|
||||||
|
'description': r're:\w+ \w+ CRISPIN RUNDOWN',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _get_id(self, url):
|
||||||
|
return 'CBSN-US'
|
||||||
|
|
||||||
|
|
||||||
class CBSNewsLiveVideoIE(InfoExtractor):
|
class CBSNewsLiveVideoIE(InfoExtractor):
|
||||||
|
@ -111,7 +411,7 @@ class CBSNewsLiveVideoIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[^/?#]+)'
|
||||||
|
|
||||||
# Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples
|
# Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
|
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'clinton-sanders-prepare-to-face-off-in-nh',
|
'id': 'clinton-sanders-prepare-to-face-off-in-nh',
|
||||||
|
@ -120,7 +420,7 @@ class CBSNewsLiveVideoIE(InfoExtractor):
|
||||||
'duration': 334,
|
'duration': 334,
|
||||||
},
|
},
|
||||||
'skip': 'Video gone',
|
'skip': 'Video gone',
|
||||||
}
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
@ -131,13 +431,13 @@ def _real_extract(self, url):
|
||||||
'dvr_slug': display_id,
|
'dvr_slug': display_id,
|
||||||
})
|
})
|
||||||
|
|
||||||
formats = self._extract_akamai_formats(video_info['url'], display_id)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': display_id,
|
'id': display_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': video_info['headline'],
|
'formats': self._extract_akamai_formats(video_info['url'], display_id),
|
||||||
'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'),
|
**traverse_obj(video_info, {
|
||||||
'duration': parse_duration(video_info.get('segmentDur')),
|
'title': 'headline',
|
||||||
'formats': formats,
|
'thumbnail': ('thumbnail_url_hd', {url_or_none}),
|
||||||
|
'duration': ('segmentDur', {parse_duration}),
|
||||||
|
}),
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
|
|
||||||
class ComedyCentralIE(MTVServicesInfoExtractor):
|
class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?|collection-playlist)/(?P<id>[0-9a-z]{6})'
|
_VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?|collection-playlist|movies)/(?P<id>[0-9a-z]{6})'
|
||||||
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
|
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -25,6 +25,9 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.cc.com/collection-playlist/cosnej/stand-up-specials/t6vtjb',
|
'url': 'https://www.cc.com/collection-playlist/cosnej/stand-up-specials/t6vtjb',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cc.com/movies/tkp406/a-cluesterfuenke-christmas',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -314,6 +314,11 @@ class InfoExtractor:
|
||||||
* "author" - human-readable name of the comment author
|
* "author" - human-readable name of the comment author
|
||||||
* "author_id" - user ID of the comment author
|
* "author_id" - user ID of the comment author
|
||||||
* "author_thumbnail" - The thumbnail of the comment author
|
* "author_thumbnail" - The thumbnail of the comment author
|
||||||
|
* "author_url" - The url to the comment author's page
|
||||||
|
* "author_is_verified" - Whether the author is verified
|
||||||
|
on the platform
|
||||||
|
* "author_is_uploader" - Whether the comment is made by
|
||||||
|
the video uploader
|
||||||
* "id" - Comment ID
|
* "id" - Comment ID
|
||||||
* "html" - Comment as HTML
|
* "html" - Comment as HTML
|
||||||
* "text" - Plain text of the comment
|
* "text" - Plain text of the comment
|
||||||
|
@ -325,8 +330,8 @@ class InfoExtractor:
|
||||||
* "dislike_count" - Number of negative ratings of the comment
|
* "dislike_count" - Number of negative ratings of the comment
|
||||||
* "is_favorited" - Whether the comment is marked as
|
* "is_favorited" - Whether the comment is marked as
|
||||||
favorite by the video uploader
|
favorite by the video uploader
|
||||||
* "author_is_uploader" - Whether the comment is made by
|
* "is_pinned" - Whether the comment is pinned to
|
||||||
the video uploader
|
the top of the comments
|
||||||
age_limit: Age restriction for the video, as an integer (years)
|
age_limit: Age restriction for the video, as an integer (years)
|
||||||
webpage_url: The URL to the video webpage, if given to yt-dlp it
|
webpage_url: The URL to the video webpage, if given to yt-dlp it
|
||||||
should allow to get the same result again. (It will be set
|
should allow to get the same result again. (It will be set
|
||||||
|
@ -350,6 +355,10 @@ class InfoExtractor:
|
||||||
* "start_time" - The start time of the chapter in seconds
|
* "start_time" - The start time of the chapter in seconds
|
||||||
* "end_time" - The end time of the chapter in seconds
|
* "end_time" - The end time of the chapter in seconds
|
||||||
* "title" (optional, string)
|
* "title" (optional, string)
|
||||||
|
heatmap: A list of dictionaries, with the following entries:
|
||||||
|
* "start_time" - The start time of the data point in seconds
|
||||||
|
* "end_time" - The end time of the data point in seconds
|
||||||
|
* "value" - The normalized value of the data point (float between 0 and 1)
|
||||||
playable_in_embed: Whether this video is allowed to play in embedded
|
playable_in_embed: Whether this video is allowed to play in embedded
|
||||||
players on other sites. Can be True (=always allowed),
|
players on other sites. Can be True (=always allowed),
|
||||||
False (=never allowed), None (=unknown), or a string
|
False (=never allowed), None (=unknown), or a string
|
||||||
|
@ -3455,7 +3464,7 @@ def _set_cookie(self, domain, name, value, expire_time=None, port=None,
|
||||||
|
|
||||||
def _get_cookies(self, url):
|
def _get_cookies(self, url):
|
||||||
""" Return a http.cookies.SimpleCookie with the cookies for the url """
|
""" Return a http.cookies.SimpleCookie with the cookies for the url """
|
||||||
return LenientSimpleCookie(self._downloader._calc_cookies(url))
|
return LenientSimpleCookie(self._downloader.cookiejar.get_cookie_header(url))
|
||||||
|
|
||||||
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
||||||
"""
|
"""
|
||||||
|
|
34
yt_dlp/extractor/crtvg.py
Normal file
34
yt_dlp/extractor/crtvg.py
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import remove_end
|
||||||
|
|
||||||
|
|
||||||
|
class CrtvgIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?crtvg\.es/tvg/a-carta/[^/#?]+-(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.crtvg.es/tvg/a-carta/os-caimans-do-tea-5839623',
|
||||||
|
'md5': 'c0958d9ff90e4503a75544358758921d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5839623',
|
||||||
|
'title': 'Os caimáns do Tea',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:f71cfba21ae564f0a6f415b31de1f842',
|
||||||
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_url = self._search_regex(r'var\s+url\s*=\s*["\']([^"\']+)', webpage, 'video url')
|
||||||
|
formats = self._extract_m3u8_formats(video_url + '/playlist.m3u8', video_id, fatal=False)
|
||||||
|
formats.extend(self._extract_mpd_formats(video_url + '/manifest.mpd', video_id, fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': remove_end(self._html_search_meta(
|
||||||
|
['og:title', 'twitter:title'], webpage, 'title', default=None), ' | CRTVG'),
|
||||||
|
'description': self._html_search_meta('description', webpage, 'description', default=None),
|
||||||
|
'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage, 'thumbnail', default=None),
|
||||||
|
}
|
|
@ -1,28 +1,37 @@
|
||||||
import base64
|
import base64
|
||||||
import urllib.parse
|
import urllib.error
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
format_field,
|
format_field,
|
||||||
|
int_or_none,
|
||||||
join_nonempty,
|
join_nonempty,
|
||||||
|
parse_age_limit,
|
||||||
|
parse_count,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
qualities,
|
qualities,
|
||||||
|
remove_start,
|
||||||
|
time_seconds,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
try_get,
|
url_or_none,
|
||||||
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CrunchyrollBaseIE(InfoExtractor):
|
class CrunchyrollBaseIE(InfoExtractor):
|
||||||
_LOGIN_URL = 'https://www.crunchyroll.com/welcome/login'
|
_BASE_URL = 'https://www.crunchyroll.com'
|
||||||
_API_BASE = 'https://api.crunchyroll.com'
|
_API_BASE = 'https://api.crunchyroll.com'
|
||||||
_NETRC_MACHINE = 'crunchyroll'
|
_NETRC_MACHINE = 'crunchyroll'
|
||||||
params = None
|
_AUTH_HEADERS = None
|
||||||
|
_API_ENDPOINT = None
|
||||||
|
_BASIC_AUTH = None
|
||||||
|
_QUERY = {}
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_logged_in(self):
|
def is_logged_in(self):
|
||||||
return self._get_cookies(self._LOGIN_URL).get('etp_rt')
|
return self._get_cookies(self._BASE_URL).get('etp_rt')
|
||||||
|
|
||||||
def _perform_login(self, username, password):
|
def _perform_login(self, username, password):
|
||||||
if self.is_logged_in:
|
if self.is_logged_in:
|
||||||
|
@ -35,7 +44,7 @@ def _perform_login(self, username, password):
|
||||||
'device_id': 'whatvalueshouldbeforweb',
|
'device_id': 'whatvalueshouldbeforweb',
|
||||||
'device_type': 'com.crunchyroll.static',
|
'device_type': 'com.crunchyroll.static',
|
||||||
'access_token': 'giKq5eY27ny3cqz',
|
'access_token': 'giKq5eY27ny3cqz',
|
||||||
'referer': self._LOGIN_URL
|
'referer': f'{self._BASE_URL}/welcome/login'
|
||||||
})
|
})
|
||||||
if upsell_response['code'] != 'ok':
|
if upsell_response['code'] != 'ok':
|
||||||
raise ExtractorError('Could not get session id')
|
raise ExtractorError('Could not get session id')
|
||||||
|
@ -43,149 +52,89 @@ def _perform_login(self, username, password):
|
||||||
|
|
||||||
login_response = self._download_json(
|
login_response = self._download_json(
|
||||||
f'{self._API_BASE}/login.1.json', None, 'Logging in',
|
f'{self._API_BASE}/login.1.json', None, 'Logging in',
|
||||||
data=urllib.parse.urlencode({
|
data=urlencode_postdata({
|
||||||
'account': username,
|
'account': username,
|
||||||
'password': password,
|
'password': password,
|
||||||
'session_id': session_id
|
'session_id': session_id
|
||||||
}).encode('ascii'))
|
}))
|
||||||
if login_response['code'] != 'ok':
|
if login_response['code'] != 'ok':
|
||||||
raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
|
raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
|
||||||
if not self.is_logged_in:
|
if not self.is_logged_in:
|
||||||
raise ExtractorError('Login succeeded but did not set etp_rt cookie')
|
raise ExtractorError('Login succeeded but did not set etp_rt cookie')
|
||||||
|
|
||||||
def _get_embedded_json(self, webpage, display_id):
|
def _update_query(self, lang):
|
||||||
initial_state = self._parse_json(self._search_regex(
|
if lang in CrunchyrollBaseIE._QUERY:
|
||||||
r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'), display_id)
|
return
|
||||||
app_config = self._parse_json(self._search_regex(
|
|
||||||
r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'), display_id)
|
|
||||||
return initial_state, app_config
|
|
||||||
|
|
||||||
def _get_params(self, lang):
|
webpage = self._download_webpage(
|
||||||
if not CrunchyrollBaseIE.params:
|
f'{self._BASE_URL}/{lang}', None, note=f'Retrieving main page (lang={lang or None})')
|
||||||
if self._get_cookies(f'https://www.crunchyroll.com/{lang}').get('etp_rt'):
|
|
||||||
grant_type, key = 'etp_rt_cookie', 'accountAuthClientId'
|
|
||||||
else:
|
|
||||||
grant_type, key = 'client_id', 'anonClientId'
|
|
||||||
|
|
||||||
initial_state, app_config = self._get_embedded_json(self._download_webpage(
|
initial_state = self._search_json(r'__INITIAL_STATE__\s*=', webpage, 'initial state', None)
|
||||||
f'https://www.crunchyroll.com/{lang}', None, note='Retrieving main page'), None)
|
CrunchyrollBaseIE._QUERY[lang] = traverse_obj(initial_state, {
|
||||||
api_domain = app_config['cxApiParams']['apiDomain'].replace('beta.crunchyroll.com', 'www.crunchyroll.com')
|
'locale': ('localization', 'locale'),
|
||||||
|
}) or None
|
||||||
|
|
||||||
auth_response = self._download_json(
|
if CrunchyrollBaseIE._BASIC_AUTH:
|
||||||
f'{api_domain}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
|
return
|
||||||
headers={
|
|
||||||
'Authorization': 'Basic ' + str(base64.b64encode(('%s:' % app_config['cxApiParams'][key]).encode('ascii')), 'ascii')
|
|
||||||
}, data=f'grant_type={grant_type}'.encode('ascii'))
|
|
||||||
policy_response = self._download_json(
|
|
||||||
f'{api_domain}/index/v2', None, note='Retrieving signed policy',
|
|
||||||
headers={
|
|
||||||
'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']
|
|
||||||
})
|
|
||||||
cms = policy_response.get('cms_web')
|
|
||||||
bucket = cms['bucket']
|
|
||||||
params = {
|
|
||||||
'Policy': cms['policy'],
|
|
||||||
'Signature': cms['signature'],
|
|
||||||
'Key-Pair-Id': cms['key_pair_id']
|
|
||||||
}
|
|
||||||
locale = traverse_obj(initial_state, ('localization', 'locale'))
|
|
||||||
if locale:
|
|
||||||
params['locale'] = locale
|
|
||||||
CrunchyrollBaseIE.params = (api_domain, bucket, params)
|
|
||||||
return CrunchyrollBaseIE.params
|
|
||||||
|
|
||||||
|
app_config = self._search_json(r'__APP_CONFIG__\s*=', webpage, 'app config', None)
|
||||||
|
cx_api_param = app_config['cxApiParams']['accountAuthClientId' if self.is_logged_in else 'anonClientId']
|
||||||
|
self.write_debug(f'Using cxApiParam={cx_api_param}')
|
||||||
|
CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
|
||||||
|
|
||||||
class CrunchyrollBetaIE(CrunchyrollBaseIE):
|
def _update_auth(self):
|
||||||
IE_NAME = 'crunchyroll'
|
if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds():
|
||||||
_VALID_URL = r'''(?x)
|
return
|
||||||
https?://(?:beta|www)\.crunchyroll\.com/
|
|
||||||
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
|
||||||
watch/(?P<id>\w+)
|
|
||||||
(?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)'''
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'GY2P1Q98Y',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'duration': 1380.241,
|
|
||||||
'timestamp': 1459632600,
|
|
||||||
'description': 'md5:a022fbec4fbb023d43631032c91ed64b',
|
|
||||||
'title': 'World Trigger Episode 73 – To the Future',
|
|
||||||
'upload_date': '20160402',
|
|
||||||
'series': 'World Trigger',
|
|
||||||
'series_id': 'GR757DMKY',
|
|
||||||
'season': 'World Trigger',
|
|
||||||
'season_id': 'GR9P39NJ6',
|
|
||||||
'season_number': 1,
|
|
||||||
'episode': 'To the Future',
|
|
||||||
'episode_number': 73,
|
|
||||||
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$',
|
|
||||||
'chapters': 'count:2',
|
|
||||||
},
|
|
||||||
'params': {'skip_download': 'm3u8', 'format': 'all[format_id~=hardsub]'},
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'GYE5WKQGR',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'duration': 366.459,
|
|
||||||
'timestamp': 1476788400,
|
|
||||||
'description': 'md5:74b67283ffddd75f6e224ca7dc031e76',
|
|
||||||
'title': 'SHELTER Episode – Porter Robinson presents Shelter the Animation',
|
|
||||||
'upload_date': '20161018',
|
|
||||||
'series': 'SHELTER',
|
|
||||||
'series_id': 'GYGG09WWY',
|
|
||||||
'season': 'SHELTER',
|
|
||||||
'season_id': 'GR09MGK4R',
|
|
||||||
'season_number': 1,
|
|
||||||
'episode': 'Porter Robinson presents Shelter the Animation',
|
|
||||||
'episode_number': 0,
|
|
||||||
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$',
|
|
||||||
'chapters': 'count:0',
|
|
||||||
},
|
|
||||||
'params': {'skip_download': True},
|
|
||||||
'skip': 'Video is Premium only',
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
assert CrunchyrollBaseIE._BASIC_AUTH, '_update_query needs to be called at least one time beforehand'
|
||||||
lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id')
|
grant_type = 'etp_rt_cookie' if self.is_logged_in else 'client_id'
|
||||||
api_domain, bucket, params = self._get_params(lang)
|
auth_response = self._download_json(
|
||||||
|
f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
|
||||||
|
headers={'Authorization': CrunchyrollBaseIE._BASIC_AUTH}, data=f'grant_type={grant_type}'.encode())
|
||||||
|
|
||||||
episode_response = self._download_json(
|
CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']}
|
||||||
f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id,
|
CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10)
|
||||||
note='Retrieving episode metadata', query=params)
|
|
||||||
if episode_response.get('is_premium_only') and not bucket.endswith('crunchyroll'):
|
|
||||||
if self.is_logged_in:
|
|
||||||
raise ExtractorError('This video is for premium members only', expected=True)
|
|
||||||
else:
|
|
||||||
self.raise_login_required('This video is for premium members only')
|
|
||||||
|
|
||||||
stream_response = self._download_json(
|
def _call_base_api(self, endpoint, internal_id, lang, note=None, query={}):
|
||||||
f'{api_domain}{episode_response["__links__"]["streams"]["href"]}', display_id,
|
self._update_query(lang)
|
||||||
note='Retrieving stream info', query=params)
|
self._update_auth()
|
||||||
get_streams = lambda name: (traverse_obj(stream_response, name) or {}).items()
|
|
||||||
|
|
||||||
requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
|
if not endpoint.startswith('/'):
|
||||||
hardsub_preference = qualities(requested_hardsubs[::-1])
|
endpoint = f'/{endpoint}'
|
||||||
|
|
||||||
|
return self._download_json(
|
||||||
|
f'{self._BASE_URL}{endpoint}', internal_id, note or f'Calling API: {endpoint}',
|
||||||
|
headers=CrunchyrollBaseIE._AUTH_HEADERS, query={**CrunchyrollBaseIE._QUERY[lang], **query})
|
||||||
|
|
||||||
|
def _call_api(self, path, internal_id, lang, note='api', query={}):
|
||||||
|
if not path.startswith(f'/content/v2/{self._API_ENDPOINT}/'):
|
||||||
|
path = f'/content/v2/{self._API_ENDPOINT}/{path}'
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = self._call_base_api(
|
||||||
|
path, internal_id, lang, f'Downloading {note} JSON ({self._API_ENDPOINT})', query=query)
|
||||||
|
except ExtractorError as error:
|
||||||
|
if isinstance(error.cause, urllib.error.HTTPError) and error.cause.code == 404:
|
||||||
|
return None
|
||||||
|
raise
|
||||||
|
|
||||||
|
if not result:
|
||||||
|
raise ExtractorError(f'Unexpected response when downloading {note} JSON')
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _extract_formats(self, stream_response, display_id=None):
|
||||||
requested_formats = self._configuration_arg('format') or ['adaptive_hls']
|
requested_formats = self._configuration_arg('format') or ['adaptive_hls']
|
||||||
|
|
||||||
available_formats = {}
|
available_formats = {}
|
||||||
for stream_type, streams in get_streams('streams'):
|
for stream_type, streams in traverse_obj(
|
||||||
|
stream_response, (('streams', ('data', 0)), {dict.items}, ...)):
|
||||||
if stream_type not in requested_formats:
|
if stream_type not in requested_formats:
|
||||||
continue
|
continue
|
||||||
for stream in streams.values():
|
for stream in traverse_obj(streams, lambda _, v: v['url']):
|
||||||
if not stream.get('url'):
|
|
||||||
continue
|
|
||||||
hardsub_lang = stream.get('hardsub_locale') or ''
|
hardsub_lang = stream.get('hardsub_locale') or ''
|
||||||
format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s'))
|
format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s'))
|
||||||
available_formats[hardsub_lang] = (stream_type, format_id, hardsub_lang, stream['url'])
|
available_formats[hardsub_lang] = (stream_type, format_id, hardsub_lang, stream['url'])
|
||||||
|
|
||||||
|
requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
|
||||||
if '' in available_formats and 'all' not in requested_hardsubs:
|
if '' in available_formats and 'all' not in requested_hardsubs:
|
||||||
full_format_langs = set(requested_hardsubs)
|
full_format_langs = set(requested_hardsubs)
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
|
@ -196,6 +145,8 @@ def _real_extract(self, url):
|
||||||
else:
|
else:
|
||||||
full_format_langs = set(map(str.lower, available_formats))
|
full_format_langs = set(map(str.lower, available_formats))
|
||||||
|
|
||||||
|
audio_locale = traverse_obj(stream_response, ((None, 'meta'), 'audio_locale'), get_all=False)
|
||||||
|
hardsub_preference = qualities(requested_hardsubs[::-1])
|
||||||
formats = []
|
formats = []
|
||||||
for stream_type, format_id, hardsub_lang, stream_url in available_formats.values():
|
for stream_type, format_id, hardsub_lang, stream_url in available_formats.values():
|
||||||
if stream_type.endswith('hls'):
|
if stream_type.endswith('hls'):
|
||||||
|
@ -214,63 +165,292 @@ def _real_extract(self, url):
|
||||||
continue
|
continue
|
||||||
for f in adaptive_formats:
|
for f in adaptive_formats:
|
||||||
if f.get('acodec') != 'none':
|
if f.get('acodec') != 'none':
|
||||||
f['language'] = stream_response.get('audio_locale')
|
f['language'] = audio_locale
|
||||||
f['quality'] = hardsub_preference(hardsub_lang.lower())
|
f['quality'] = hardsub_preference(hardsub_lang.lower())
|
||||||
formats.extend(adaptive_formats)
|
formats.extend(adaptive_formats)
|
||||||
|
|
||||||
chapters = None
|
return formats
|
||||||
|
|
||||||
|
def _extract_subtitles(self, data):
|
||||||
|
subtitles = {}
|
||||||
|
|
||||||
|
for locale, subtitle in traverse_obj(data, ((None, 'meta'), 'subtitles', {dict.items}, ...)):
|
||||||
|
subtitles[locale] = [traverse_obj(subtitle, {'url': 'url', 'ext': 'format'})]
|
||||||
|
|
||||||
|
return subtitles
|
||||||
|
|
||||||
|
|
||||||
|
class CrunchyrollCmsBaseIE(CrunchyrollBaseIE):
|
||||||
|
_API_ENDPOINT = 'cms'
|
||||||
|
_CMS_EXPIRY = None
|
||||||
|
|
||||||
|
def _call_cms_api_signed(self, path, internal_id, lang, note='api'):
|
||||||
|
if not CrunchyrollCmsBaseIE._CMS_EXPIRY or CrunchyrollCmsBaseIE._CMS_EXPIRY <= time_seconds():
|
||||||
|
response = self._call_base_api('index/v2', None, lang, 'Retrieving signed policy')['cms_web']
|
||||||
|
CrunchyrollCmsBaseIE._CMS_QUERY = {
|
||||||
|
'Policy': response['policy'],
|
||||||
|
'Signature': response['signature'],
|
||||||
|
'Key-Pair-Id': response['key_pair_id'],
|
||||||
|
}
|
||||||
|
CrunchyrollCmsBaseIE._CMS_BUCKET = response['bucket']
|
||||||
|
CrunchyrollCmsBaseIE._CMS_EXPIRY = parse_iso8601(response['expires']) - 10
|
||||||
|
|
||||||
|
if not path.startswith('/cms/v2'):
|
||||||
|
path = f'/cms/v2{CrunchyrollCmsBaseIE._CMS_BUCKET}/{path}'
|
||||||
|
|
||||||
|
return self._call_base_api(
|
||||||
|
path, internal_id, lang, f'Downloading {note} JSON (signed cms)', query=CrunchyrollCmsBaseIE._CMS_QUERY)
|
||||||
|
|
||||||
|
|
||||||
|
class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
||||||
|
IE_NAME = 'crunchyroll'
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://(?:beta\.|www\.)?crunchyroll\.com/
|
||||||
|
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||||
|
watch/(?!concert|musicvideo)(?P<id>\w+)'''
|
||||||
|
_TESTS = [{
|
||||||
|
# Premium only
|
||||||
|
'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'GY2P1Q98Y',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'duration': 1380.241,
|
||||||
|
'timestamp': 1459632600,
|
||||||
|
'description': 'md5:a022fbec4fbb023d43631032c91ed64b',
|
||||||
|
'title': 'World Trigger Episode 73 – To the Future',
|
||||||
|
'upload_date': '20160402',
|
||||||
|
'series': 'World Trigger',
|
||||||
|
'series_id': 'GR757DMKY',
|
||||||
|
'season': 'World Trigger',
|
||||||
|
'season_id': 'GR9P39NJ6',
|
||||||
|
'season_number': 1,
|
||||||
|
'episode': 'To the Future',
|
||||||
|
'episode_number': 73,
|
||||||
|
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||||
|
'chapters': 'count:2',
|
||||||
|
'age_limit': 14,
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8', 'format': 'all[format_id~=hardsub]'},
|
||||||
|
}, {
|
||||||
|
# Premium only
|
||||||
|
'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'GYE5WKQGR',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'duration': 366.459,
|
||||||
|
'timestamp': 1476788400,
|
||||||
|
'description': 'md5:74b67283ffddd75f6e224ca7dc031e76',
|
||||||
|
'title': 'SHELTER – Porter Robinson presents Shelter the Animation',
|
||||||
|
'upload_date': '20161018',
|
||||||
|
'series': 'SHELTER',
|
||||||
|
'series_id': 'GYGG09WWY',
|
||||||
|
'season': 'SHELTER',
|
||||||
|
'season_id': 'GR09MGK4R',
|
||||||
|
'season_number': 1,
|
||||||
|
'episode': 'Porter Robinson presents Shelter the Animation',
|
||||||
|
'episode_number': 0,
|
||||||
|
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||||
|
'age_limit': 14,
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
},
|
||||||
|
'params': {'skip_download': True},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.crunchyroll.com/watch/GJWU2VKK3/cherry-blossom-meeting-and-a-coming-blizzard',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'GJWU2VKK3',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'duration': 1420.054,
|
||||||
|
'description': 'md5:2d1c67c0ec6ae514d9c30b0b99a625cd',
|
||||||
|
'title': 'The Ice Guy and His Cool Female Colleague Episode 1 – Cherry Blossom Meeting and a Coming Blizzard',
|
||||||
|
'series': 'The Ice Guy and His Cool Female Colleague',
|
||||||
|
'series_id': 'GW4HM75NP',
|
||||||
|
'season': 'The Ice Guy and His Cool Female Colleague',
|
||||||
|
'season_id': 'GY9PC21VE',
|
||||||
|
'season_number': 1,
|
||||||
|
'episode': 'Cherry Blossom Meeting and a Coming Blizzard',
|
||||||
|
'episode_number': 1,
|
||||||
|
'chapters': 'count:2',
|
||||||
|
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||||
|
'timestamp': 1672839000,
|
||||||
|
'upload_date': '20230104',
|
||||||
|
'age_limit': 14,
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.crunchyroll.com/watch/GM8F313NQ',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'GM8F313NQ',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Garakowa -Restore the World-',
|
||||||
|
'description': 'md5:8d2f8b6b9dd77d87810882e7d2ee5608',
|
||||||
|
'duration': 3996.104,
|
||||||
|
'age_limit': 13,
|
||||||
|
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.crunchyroll.com/watch/G62PEZ2E6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'G62PEZ2E6',
|
||||||
|
'description': 'md5:8d2f8b6b9dd77d87810882e7d2ee5608',
|
||||||
|
'age_limit': 13,
|
||||||
|
'duration': 65.138,
|
||||||
|
'title': 'Garakowa -Restore the World-',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
# We want to support lazy playlist filtering and movie listings cannot be inside a playlist
|
||||||
|
_RETURN_TYPE = 'video'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
lang, internal_id = self._match_valid_url(url).group('lang', 'id')
|
||||||
|
|
||||||
|
# We need to use unsigned API call to allow ratings query string
|
||||||
|
response = traverse_obj(self._call_api(
|
||||||
|
f'objects/{internal_id}', internal_id, lang, 'object info', {'ratings': 'true'}), ('data', 0, {dict}))
|
||||||
|
if not response:
|
||||||
|
raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)
|
||||||
|
|
||||||
|
object_type = response.get('type')
|
||||||
|
if object_type == 'episode':
|
||||||
|
result = self._transform_episode_response(response)
|
||||||
|
|
||||||
|
elif object_type == 'movie':
|
||||||
|
result = self._transform_movie_response(response)
|
||||||
|
|
||||||
|
elif object_type == 'movie_listing':
|
||||||
|
first_movie_id = traverse_obj(response, ('movie_listing_metadata', 'first_movie_id'))
|
||||||
|
if not self._yes_playlist(internal_id, first_movie_id):
|
||||||
|
return self.url_result(f'{self._BASE_URL}/{lang}watch/{first_movie_id}', CrunchyrollBetaIE, first_movie_id)
|
||||||
|
|
||||||
|
def entries():
|
||||||
|
movies = self._call_api(f'movie_listings/{internal_id}/movies', internal_id, lang, 'movie list')
|
||||||
|
for movie_response in traverse_obj(movies, ('data', ...)):
|
||||||
|
yield self.url_result(
|
||||||
|
f'{self._BASE_URL}/{lang}watch/{movie_response["id"]}',
|
||||||
|
CrunchyrollBetaIE, **self._transform_movie_response(movie_response))
|
||||||
|
|
||||||
|
return self.playlist_result(entries(), **self._transform_movie_response(response))
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ExtractorError(f'Unknown object type {object_type}')
|
||||||
|
|
||||||
|
# There might be multiple audio languages for one object (`<object>_metadata.versions`),
|
||||||
|
# so we need to get the id from `streams_link` instead or we dont know which language to choose
|
||||||
|
streams_link = response.get('streams_link')
|
||||||
|
if not streams_link and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
|
||||||
|
message = f'This {object_type} is for premium members only'
|
||||||
|
if self.is_logged_in:
|
||||||
|
raise ExtractorError(message, expected=True)
|
||||||
|
self.raise_login_required(message)
|
||||||
|
|
||||||
|
# We need go from unsigned to signed api to avoid getting soft banned
|
||||||
|
stream_response = self._call_cms_api_signed(remove_start(
|
||||||
|
streams_link, '/content/v2/cms/'), internal_id, lang, 'stream info')
|
||||||
|
result['formats'] = self._extract_formats(stream_response, internal_id)
|
||||||
|
result['subtitles'] = self._extract_subtitles(stream_response)
|
||||||
|
|
||||||
# if no intro chapter is available, a 403 without usable data is returned
|
# if no intro chapter is available, a 403 without usable data is returned
|
||||||
intro_chapter = self._download_json(f'https://static.crunchyroll.com/datalab-intro-v2/{internal_id}.json',
|
intro_chapter = self._download_json(
|
||||||
display_id, fatal=False, errnote=False)
|
f'https://static.crunchyroll.com/datalab-intro-v2/{internal_id}.json',
|
||||||
|
internal_id, note='Downloading chapter info', fatal=False, errnote=False)
|
||||||
if isinstance(intro_chapter, dict):
|
if isinstance(intro_chapter, dict):
|
||||||
chapters = [{
|
result['chapters'] = [{
|
||||||
'title': 'Intro',
|
'title': 'Intro',
|
||||||
'start_time': float_or_none(intro_chapter.get('startTime')),
|
'start_time': float_or_none(intro_chapter.get('startTime')),
|
||||||
'end_time': float_or_none(intro_chapter.get('endTime'))
|
'end_time': float_or_none(intro_chapter.get('endTime')),
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def calculate_count(item):
|
||||||
|
return parse_count(''.join((item['displayed'], item.get('unit') or '')))
|
||||||
|
|
||||||
|
result.update(traverse_obj(response, ('rating', {
|
||||||
|
'like_count': ('up', {calculate_count}),
|
||||||
|
'dislike_count': ('down', {calculate_count}),
|
||||||
|
})))
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _transform_episode_response(data):
|
||||||
|
metadata = traverse_obj(data, (('episode_metadata', None), {dict}), get_all=False) or {}
|
||||||
return {
|
return {
|
||||||
'id': internal_id,
|
'id': data['id'],
|
||||||
'title': '%s Episode %s – %s' % (
|
'title': ' \u2013 '.join((
|
||||||
episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')),
|
('%s%s' % (
|
||||||
'description': try_get(episode_response, lambda x: x['description'].replace(r'\r\n', '\n')),
|
format_field(metadata, 'season_title'),
|
||||||
'duration': float_or_none(episode_response.get('duration_ms'), 1000),
|
format_field(metadata, 'episode', ' Episode %s'))),
|
||||||
'timestamp': parse_iso8601(episode_response.get('upload_date')),
|
format_field(data, 'title'))),
|
||||||
'series': episode_response.get('series_title'),
|
**traverse_obj(data, {
|
||||||
'series_id': episode_response.get('series_id'),
|
'episode': ('title', {str}),
|
||||||
'season': episode_response.get('season_title'),
|
'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}),
|
||||||
'season_id': episode_response.get('season_id'),
|
'thumbnails': ('images', 'thumbnail', ..., ..., {
|
||||||
'season_number': episode_response.get('season_number'),
|
'url': ('source', {url_or_none}),
|
||||||
'episode': episode_response.get('title'),
|
'width': ('width', {int_or_none}),
|
||||||
'episode_number': episode_response.get('sequence_number'),
|
'height': ('height', {int_or_none}),
|
||||||
'formats': formats,
|
}),
|
||||||
'thumbnails': [{
|
}),
|
||||||
'url': thumb.get('source'),
|
**traverse_obj(metadata, {
|
||||||
'width': thumb.get('width'),
|
'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}),
|
||||||
'height': thumb.get('height'),
|
'timestamp': ('upload_date', {parse_iso8601}),
|
||||||
} for thumb in traverse_obj(episode_response, ('images', 'thumbnail', ..., ...)) or []],
|
'series': ('series_title', {str}),
|
||||||
'subtitles': {
|
'series_id': ('series_id', {str}),
|
||||||
lang: [{
|
'season': ('season_title', {str}),
|
||||||
'url': subtitle_data.get('url'),
|
'season_id': ('season_id', {str}),
|
||||||
'ext': subtitle_data.get('format')
|
'season_number': ('season_number', ({int}, {float_or_none})),
|
||||||
}] for lang, subtitle_data in get_streams('subtitles')
|
'episode_number': ('sequence_number', ({int}, {float_or_none})),
|
||||||
},
|
'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
|
||||||
'chapters': chapters
|
'language': ('audio_locale', {str}),
|
||||||
|
}, get_all=False),
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _transform_movie_response(data):
|
||||||
|
metadata = traverse_obj(data, (('movie_metadata', 'movie_listing_metadata', None), {dict}), get_all=False) or {}
|
||||||
|
return {
|
||||||
|
'id': data['id'],
|
||||||
|
**traverse_obj(data, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}),
|
||||||
|
'thumbnails': ('images', 'thumbnail', ..., ..., {
|
||||||
|
'url': ('source', {url_or_none}),
|
||||||
|
'width': ('width', {int_or_none}),
|
||||||
|
'height': ('height', {int_or_none}),
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
**traverse_obj(metadata, {
|
||||||
|
'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}),
|
||||||
|
'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
|
||||||
|
}),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class CrunchyrollBetaShowIE(CrunchyrollBaseIE):
|
class CrunchyrollBetaShowIE(CrunchyrollCmsBaseIE):
|
||||||
IE_NAME = 'crunchyroll:playlist'
|
IE_NAME = 'crunchyroll:playlist'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://(?:beta|www)\.crunchyroll\.com/
|
https?://(?:beta\.|www\.)?crunchyroll\.com/
|
||||||
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||||
series/(?P<id>\w+)
|
series/(?P<id>\w+)'''
|
||||||
(?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)'''
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA',
|
'url': 'https://www.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'GY19NQ2QR',
|
'id': 'GY19NQ2QR',
|
||||||
'title': 'Girl Friend BETA',
|
'title': 'Girl Friend BETA',
|
||||||
|
'description': 'md5:99c1b22ee30a74b536a8277ced8eb750',
|
||||||
|
# XXX: `thumbnail` does not get set from `thumbnails` in playlist
|
||||||
|
# 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||||
|
'age_limit': 14,
|
||||||
},
|
},
|
||||||
'playlist_mincount': 10,
|
'playlist_mincount': 10,
|
||||||
}, {
|
}, {
|
||||||
|
@ -279,41 +459,163 @@ class CrunchyrollBetaShowIE(CrunchyrollBaseIE):
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id')
|
lang, internal_id = self._match_valid_url(url).group('lang', 'id')
|
||||||
api_domain, bucket, params = self._get_params(lang)
|
|
||||||
|
|
||||||
series_response = self._download_json(
|
|
||||||
f'{api_domain}/cms/v2{bucket}/series/{internal_id}', display_id,
|
|
||||||
note='Retrieving series metadata', query=params)
|
|
||||||
|
|
||||||
seasons_response = self._download_json(
|
|
||||||
f'{api_domain}/cms/v2{bucket}/seasons?series_id={internal_id}', display_id,
|
|
||||||
note='Retrieving season list', query=params)
|
|
||||||
|
|
||||||
def entries():
|
def entries():
|
||||||
for season in seasons_response['items']:
|
seasons_response = self._call_cms_api_signed(f'seasons?series_id={internal_id}', internal_id, lang, 'seasons')
|
||||||
episodes_response = self._download_json(
|
for season in traverse_obj(seasons_response, ('items', ..., {dict})):
|
||||||
f'{api_domain}/cms/v2{bucket}/episodes?season_id={season["id"]}', display_id,
|
episodes_response = self._call_cms_api_signed(
|
||||||
note=f'Retrieving episode list for {season.get("slug_title")}', query=params)
|
f'episodes?season_id={season["id"]}', season["id"], lang, 'episode list')
|
||||||
for episode in episodes_response['items']:
|
for episode_response in traverse_obj(episodes_response, ('items', ..., {dict})):
|
||||||
episode_id = episode['id']
|
yield self.url_result(
|
||||||
episode_display_id = episode['slug_title']
|
f'{self._BASE_URL}/{lang}watch/{episode_response["id"]}',
|
||||||
yield {
|
CrunchyrollBetaIE, **CrunchyrollBetaIE._transform_episode_response(episode_response))
|
||||||
'_type': 'url',
|
|
||||||
'url': f'https://www.crunchyroll.com/{lang}watch/{episode_id}/{episode_display_id}',
|
|
||||||
'ie_key': CrunchyrollBetaIE.ie_key(),
|
|
||||||
'id': episode_id,
|
|
||||||
'title': '%s Episode %s – %s' % (episode.get('season_title'), episode.get('episode'), episode.get('title')),
|
|
||||||
'description': try_get(episode, lambda x: x['description'].replace(r'\r\n', '\n')),
|
|
||||||
'duration': float_or_none(episode.get('duration_ms'), 1000),
|
|
||||||
'series': episode.get('series_title'),
|
|
||||||
'series_id': episode.get('series_id'),
|
|
||||||
'season': episode.get('season_title'),
|
|
||||||
'season_id': episode.get('season_id'),
|
|
||||||
'season_number': episode.get('season_number'),
|
|
||||||
'episode': episode.get('title'),
|
|
||||||
'episode_number': episode.get('sequence_number'),
|
|
||||||
'language': episode.get('audio_locale'),
|
|
||||||
}
|
|
||||||
|
|
||||||
return self.playlist_result(entries(), internal_id, series_response.get('title'))
|
return self.playlist_result(
|
||||||
|
entries(), internal_id,
|
||||||
|
**traverse_obj(self._call_api(f'series/{internal_id}', internal_id, lang, 'series'), ('data', 0, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'description': ('description', {lambda x: x.replace(r'\r\n', '\n')}),
|
||||||
|
'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
|
||||||
|
'thumbnails': ('images', ..., ..., ..., {
|
||||||
|
'url': ('source', {url_or_none}),
|
||||||
|
'width': ('width', {int_or_none}),
|
||||||
|
'height': ('height', {int_or_none}),
|
||||||
|
})
|
||||||
|
})))
|
||||||
|
|
||||||
|
|
||||||
|
class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
||||||
|
IE_NAME = 'crunchyroll:music'
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://(?:www\.)?crunchyroll\.com/
|
||||||
|
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||||
|
watch/(?P<type>concert|musicvideo)/(?P<id>\w{10})'''
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C',
|
||||||
|
'info_dict': {
|
||||||
|
'ext': 'mp4',
|
||||||
|
'id': 'MV88BB7F2C',
|
||||||
|
'display_id': 'crossing-field',
|
||||||
|
'title': 'Crossing Field',
|
||||||
|
'track': 'Crossing Field',
|
||||||
|
'artist': 'LiSA',
|
||||||
|
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||||
|
'genre': ['Anime'],
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135',
|
||||||
|
'info_dict': {
|
||||||
|
'ext': 'mp4',
|
||||||
|
'id': 'MC2E2AC135',
|
||||||
|
'display_id': 'live-is-smile-always-364joker-at-yokohama-arena',
|
||||||
|
'title': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
|
||||||
|
'track': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
|
||||||
|
'artist': 'LiSA',
|
||||||
|
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||||
|
'description': 'md5:747444e7e6300907b7a43f0a0503072e',
|
||||||
|
'genre': ['J-Pop'],
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C/crossing-field',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135/live-is-smile-always-364joker-at-yokohama-arena',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
_API_ENDPOINT = 'music'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
lang, internal_id, object_type = self._match_valid_url(url).group('lang', 'id', 'type')
|
||||||
|
path, name = {
|
||||||
|
'concert': ('concerts', 'concert info'),
|
||||||
|
'musicvideo': ('music_videos', 'music video info'),
|
||||||
|
}[object_type]
|
||||||
|
response = traverse_obj(self._call_api(f'{path}/{internal_id}', internal_id, lang, name), ('data', 0, {dict}))
|
||||||
|
if not response:
|
||||||
|
raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)
|
||||||
|
|
||||||
|
streams_link = response.get('streams_link')
|
||||||
|
if not streams_link and response.get('isPremiumOnly'):
|
||||||
|
message = f'This {response.get("type") or "media"} is for premium members only'
|
||||||
|
if self.is_logged_in:
|
||||||
|
raise ExtractorError(message, expected=True)
|
||||||
|
self.raise_login_required(message)
|
||||||
|
|
||||||
|
result = self._transform_music_response(response)
|
||||||
|
stream_response = self._call_api(streams_link, internal_id, lang, 'stream info')
|
||||||
|
result['formats'] = self._extract_formats(stream_response, internal_id)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _transform_music_response(data):
|
||||||
|
return {
|
||||||
|
'id': data['id'],
|
||||||
|
**traverse_obj(data, {
|
||||||
|
'display_id': 'slug',
|
||||||
|
'title': 'title',
|
||||||
|
'track': 'title',
|
||||||
|
'artist': ('artist', 'name'),
|
||||||
|
'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n') or None}),
|
||||||
|
'thumbnails': ('images', ..., ..., {
|
||||||
|
'url': ('source', {url_or_none}),
|
||||||
|
'width': ('width', {int_or_none}),
|
||||||
|
'height': ('height', {int_or_none}),
|
||||||
|
}),
|
||||||
|
'genre': ('genres', ..., 'displayValue'),
|
||||||
|
'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CrunchyrollArtistIE(CrunchyrollBaseIE):
|
||||||
|
IE_NAME = 'crunchyroll:artist'
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://(?:www\.)?crunchyroll\.com/
|
||||||
|
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||||
|
artist/(?P<id>\w{10})'''
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.crunchyroll.com/artist/MA179CB50D',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MA179CB50D',
|
||||||
|
'title': 'LiSA',
|
||||||
|
'genre': ['J-Pop', 'Anime', 'Rock'],
|
||||||
|
'description': 'md5:16d87de61a55c3f7d6c454b73285938e',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 83,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.crunchyroll.com/artist/MA179CB50D/lisa',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
_API_ENDPOINT = 'music'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
lang, internal_id = self._match_valid_url(url).group('lang', 'id')
|
||||||
|
response = traverse_obj(self._call_api(
|
||||||
|
f'artists/{internal_id}', internal_id, lang, 'artist info'), ('data', 0))
|
||||||
|
|
||||||
|
def entries():
|
||||||
|
for attribute, path in [('concerts', 'concert'), ('videos', 'musicvideo')]:
|
||||||
|
for internal_id in traverse_obj(response, (attribute, ...)):
|
||||||
|
yield self.url_result(f'{self._BASE_URL}/watch/{path}/{internal_id}', CrunchyrollMusicIE, internal_id)
|
||||||
|
|
||||||
|
return self.playlist_result(entries(), **self._transform_artist_response(response))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _transform_artist_response(data):
|
||||||
|
return {
|
||||||
|
'id': data['id'],
|
||||||
|
**traverse_obj(data, {
|
||||||
|
'title': 'name',
|
||||||
|
'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}),
|
||||||
|
'thumbnails': ('images', ..., ..., {
|
||||||
|
'url': ('source', {url_or_none}),
|
||||||
|
'width': ('width', {int_or_none}),
|
||||||
|
'height': ('height', {int_or_none}),
|
||||||
|
}),
|
||||||
|
'genre': ('genres', ..., 'displayValue'),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
158
yt_dlp/extractor/dacast.py
Normal file
158
yt_dlp/extractor/dacast.py
Normal file
|
@ -0,0 +1,158 @@
|
||||||
|
import hashlib
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import urllib.error
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
classproperty,
|
||||||
|
float_or_none,
|
||||||
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DacastBaseIE(InfoExtractor):
|
||||||
|
_URL_TYPE = None
|
||||||
|
|
||||||
|
@classproperty
|
||||||
|
def _VALID_URL(cls):
|
||||||
|
return fr'https?://iframe\.dacast\.com/{cls._URL_TYPE}/(?P<user_id>[\w-]+)/(?P<id>[\w-]+)'
|
||||||
|
|
||||||
|
@classproperty
|
||||||
|
def _EMBED_REGEX(cls):
|
||||||
|
return [rf'<iframe[^>]+\bsrc=["\'](?P<url>{cls._VALID_URL})']
|
||||||
|
|
||||||
|
_API_INFO_URL = 'https://playback.dacast.com/content/info'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _get_url_from_id(cls, content_id):
|
||||||
|
user_id, media_id = content_id.split(f'-{cls._URL_TYPE}-')
|
||||||
|
return f'https://iframe.dacast.com/{cls._URL_TYPE}/{user_id}/{media_id}'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_embed_urls(cls, url, webpage):
|
||||||
|
yield from super()._extract_embed_urls(url, webpage)
|
||||||
|
for content_id in re.findall(
|
||||||
|
rf'<script[^>]+\bsrc=["\']https://player\.dacast\.com/js/player\.js\?contentId=([\w-]+-{cls._URL_TYPE}-[\w-]+)["\']', webpage):
|
||||||
|
yield cls._get_url_from_id(content_id)
|
||||||
|
|
||||||
|
|
||||||
|
class DacastVODIE(DacastBaseIE):
|
||||||
|
_URL_TYPE = 'vod'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://iframe.dacast.com/vod/acae82153ef4d7a7344ae4eaa86af534/1c6143e3-5a06-371d-8695-19b96ea49090',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1c6143e3-5a06-371d-8695-19b96ea49090',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader_id': 'acae82153ef4d7a7344ae4eaa86af534',
|
||||||
|
'title': '2_4||Adnexal mass characterisation: O-RADS US and MRI||N. Bharwani, London/UK',
|
||||||
|
'thumbnail': 'https://universe-files.dacast.com/26137208-5858-65c1-5e9a-9d6b6bd2b6c2',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}]
|
||||||
|
_WEBPAGE_TESTS = [{
|
||||||
|
'url': 'https://www.dacast.com/support/knowledgebase/how-can-i-embed-a-video-on-my-website/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b6674869-f08a-23c5-1d7b-81f5309e1a90',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '4-HowToEmbedVideo.mp4',
|
||||||
|
'uploader_id': '3b67c4a9-3886-4eb1-d0eb-39b23b14bef3',
|
||||||
|
'thumbnail': 'https://universe-files.dacast.com/d26ab48f-a52a-8783-c42e-a90290ba06b6.png',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'url': 'https://gist.githubusercontent.com/bashonly/4ad249ef2910346fbdf3809b220f11ee/raw/87349778d4af1a80b1fcc3beb9c88108de5858f5/dacast_embeds.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'e7df418e-a83b-7a7f-7b5e-1a667981e8fa',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Evening Service 2-5-23',
|
||||||
|
'uploader_id': '943bb1ab3c03695ba85330d92d6d226e',
|
||||||
|
'thumbnail': 'https://universe-files.dacast.com/337472b3-e92c-2ea4-7eb7-5700da477f67',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
|
||||||
|
query = {'contentId': f'{user_id}-vod-{video_id}', 'provider': 'universe'}
|
||||||
|
info = self._download_json(self._API_INFO_URL, video_id, query=query, fatal=False)
|
||||||
|
access = self._download_json(
|
||||||
|
'https://playback.dacast.com/content/access', video_id,
|
||||||
|
note='Downloading access JSON', query=query, expected_status=403)
|
||||||
|
|
||||||
|
error = access.get('error')
|
||||||
|
if error in ('Broadcaster has been blocked', 'Content is offline'):
|
||||||
|
raise ExtractorError(error, expected=True)
|
||||||
|
elif error:
|
||||||
|
raise ExtractorError(f'Dacast API says "{error}"')
|
||||||
|
|
||||||
|
hls_url = access['hls']
|
||||||
|
hls_aes = {}
|
||||||
|
|
||||||
|
if 'DRM_EXT' in hls_url:
|
||||||
|
self.report_drm(video_id)
|
||||||
|
elif '/uspaes/' in hls_url:
|
||||||
|
# From https://player.dacast.com/js/player.js
|
||||||
|
ts = int(time.time())
|
||||||
|
signature = hashlib.sha1(
|
||||||
|
f'{10413792000 - ts}{ts}YfaKtquEEpDeusCKbvYszIEZnWmBcSvw').digest().hex()
|
||||||
|
hls_aes['uri'] = f'https://keys.dacast.com/uspaes/{video_id}.key?s={signature}&ts={ts}'
|
||||||
|
|
||||||
|
for retry in self.RetryManager():
|
||||||
|
try:
|
||||||
|
formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4', m3u8_id='hls')
|
||||||
|
except ExtractorError as e:
|
||||||
|
# CDN will randomly respond with 403
|
||||||
|
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 403:
|
||||||
|
retry.error = e
|
||||||
|
continue
|
||||||
|
raise
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'uploader_id': user_id,
|
||||||
|
'formats': formats,
|
||||||
|
'hls_aes': hls_aes or None,
|
||||||
|
**traverse_obj(info, ('contentInfo', {
|
||||||
|
'title': 'title',
|
||||||
|
'duration': ('duration', {float_or_none}),
|
||||||
|
'thumbnail': ('thumbnailUrl', {url_or_none}),
|
||||||
|
})),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class DacastPlaylistIE(DacastBaseIE):
|
||||||
|
_URL_TYPE = 'playlist'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://iframe.dacast.com/playlist/943bb1ab3c03695ba85330d92d6d226e/b632eb053cac17a9c9a02bcfc827f2d8',
|
||||||
|
'playlist_mincount': 28,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b632eb053cac17a9c9a02bcfc827f2d8',
|
||||||
|
'title': 'Archive Sermons',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
_WEBPAGE_TESTS = [{
|
||||||
|
'url': 'https://gist.githubusercontent.com/bashonly/7efb606f49f3c6e07ea0327de5a661d1/raw/05a16eac830245ea301fb0a585023bec71e6093c/dacast_playlist_embed.html',
|
||||||
|
'playlist_mincount': 28,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b632eb053cac17a9c9a02bcfc827f2d8',
|
||||||
|
'title': 'Archive Sermons',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
user_id, playlist_id = self._match_valid_url(url).group('user_id', 'id')
|
||||||
|
info = self._download_json(
|
||||||
|
self._API_INFO_URL, playlist_id, note='Downloading playlist JSON', query={
|
||||||
|
'contentId': f'{user_id}-playlist-{playlist_id}',
|
||||||
|
'provider': 'universe',
|
||||||
|
})['contentInfo']
|
||||||
|
|
||||||
|
def entries(info):
|
||||||
|
for video in traverse_obj(info, ('features', 'playlist', 'contents', lambda _, v: v['id'])):
|
||||||
|
yield self.url_result(
|
||||||
|
DacastVODIE._get_url_from_id(video['id']), DacastVODIE, video['id'], video.get('title'))
|
||||||
|
|
||||||
|
return self.playlist_result(entries(info), playlist_id, info.get('title'))
|
|
@ -1,6 +1,7 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_b64decode
|
from ..compat import compat_b64decode
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
parse_count,
|
parse_count,
|
||||||
|
@ -12,21 +13,24 @@
|
||||||
|
|
||||||
|
|
||||||
class DaftsexIE(InfoExtractor):
|
class DaftsexIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?daftsex\.com/watch/(?P<id>-?\d+_\d+)'
|
_VALID_URL = r'https?://(?:www\.)?daft\.sex/watch/(?P<id>-?\d+_\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://daftsex.com/watch/-35370899_456246186',
|
'url': 'https://daft.sex/watch/-35370899_456246186',
|
||||||
'md5': 'd95135e6cea2d905bea20dbe82cda64a',
|
'md5': '64c04ef7b4c7b04b308f3b0c78efe7cd',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '-35370899_456246186',
|
'id': '-35370899_456246186',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'just relaxing',
|
'title': 'just relaxing',
|
||||||
'description': 'just relaxing - Watch video Watch video in high quality',
|
'description': 'just relaxing – Watch video Watch video in high quality',
|
||||||
'upload_date': '20201113',
|
'upload_date': '20201113',
|
||||||
'timestamp': 1605261911,
|
'timestamp': 1605261911,
|
||||||
'thumbnail': r're:https://[^/]+/impf/-43BuMDIawmBGr3GLcZ93CYwWf2PBv_tVWoS1A/dnu41DnARU4\.jpg\?size=800x450&quality=96&keep_aspect_ratio=1&background=000000&sign=6af2c26ff4a45e55334189301c867384&type=video_thumb',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'age_limit': 18,
|
||||||
|
'duration': 15.0,
|
||||||
|
'view_count': int
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://daftsex.com/watch/-156601359_456242791',
|
'url': 'https://daft.sex/watch/-156601359_456242791',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '-156601359_456242791',
|
'id': '-156601359_456242791',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -36,6 +40,7 @@ class DaftsexIE(InfoExtractor):
|
||||||
'timestamp': 1600250735,
|
'timestamp': 1600250735,
|
||||||
'thumbnail': 'https://psv153-1.crazycloud.ru/videos/-156601359/456242791/thumb.jpg?extra=i3D32KaBbBFf9TqDRMAVmQ',
|
'thumbnail': 'https://psv153-1.crazycloud.ru/videos/-156601359/456242791/thumb.jpg?extra=i3D32KaBbBFf9TqDRMAVmQ',
|
||||||
},
|
},
|
||||||
|
'skip': 'deleted / private'
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -60,7 +65,7 @@ def _real_extract(self, url):
|
||||||
webpage, 'player color', fatal=False) or ''
|
webpage, 'player color', fatal=False) or ''
|
||||||
|
|
||||||
embed_page = self._download_webpage(
|
embed_page = self._download_webpage(
|
||||||
'https://daxab.com/player/%s?color=%s' % (player_hash, player_color),
|
'https://dxb.to/player/%s?color=%s' % (player_hash, player_color),
|
||||||
video_id, headers={'Referer': url})
|
video_id, headers={'Referer': url})
|
||||||
video_params = self._parse_json(
|
video_params = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
|
@ -94,15 +99,19 @@ def _real_extract(self, url):
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
|
|
||||||
item = self._download_json(
|
items = self._download_json(
|
||||||
f'{server_domain}/method/video.get/{video_id}', video_id,
|
f'{server_domain}/method/video.get/{video_id}', video_id,
|
||||||
headers={'Referer': url}, query={
|
headers={'Referer': url}, query={
|
||||||
'token': video_params['video']['access_token'],
|
'token': video_params['video']['access_token'],
|
||||||
'videos': video_id,
|
'videos': video_id,
|
||||||
'ckey': video_params['c_key'],
|
'ckey': video_params['c_key'],
|
||||||
'credentials': video_params['video']['credentials'],
|
'credentials': video_params['video']['credentials'],
|
||||||
})['response']['items'][0]
|
})['response']['items']
|
||||||
|
|
||||||
|
if not items:
|
||||||
|
raise ExtractorError('Video is not available', video_id=video_id, expected=True)
|
||||||
|
|
||||||
|
item = items[0]
|
||||||
formats = []
|
formats = []
|
||||||
for f_id, f_url in item.get('files', {}).items():
|
for f_id, f_url in item.get('files', {}).items():
|
||||||
if f_id == 'external':
|
if f_id == 'external':
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
|
|
||||||
class DigitalConcertHallIE(InfoExtractor):
|
class DigitalConcertHallIE(InfoExtractor):
|
||||||
IE_DESC = 'DigitalConcertHall extractor'
|
IE_DESC = 'DigitalConcertHall extractor'
|
||||||
_VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/concert/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert)/(?P<id>[0-9]+)'
|
||||||
_OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
|
_OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
|
||||||
_ACCESS_TOKEN = None
|
_ACCESS_TOKEN = None
|
||||||
_NETRC_MACHINE = 'digitalconcerthall'
|
_NETRC_MACHINE = 'digitalconcerthall'
|
||||||
|
@ -40,6 +40,19 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
'params': {'skip_download': 'm3u8'},
|
'params': {'skip_download': 'm3u8'},
|
||||||
'playlist_count': 3,
|
'playlist_count': 3,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.digitalconcerthall.com/en/film/388',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '388',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The Berliner Philharmoniker and Frank Peter Zimmermann',
|
||||||
|
'description': 'md5:cfe25a7044fa4be13743e5089b5b5eb2',
|
||||||
|
'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
|
||||||
|
'upload_date': '20220714',
|
||||||
|
'timestamp': 1657785600,
|
||||||
|
'album_artist': 'Frank Peter Zimmermann / Benedikt von Bernstorff / Jakob von Bernstorff',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _perform_login(self, username, password):
|
def _perform_login(self, username, password):
|
||||||
|
@ -75,7 +88,7 @@ def _real_initialize(self):
|
||||||
if not self._ACCESS_TOKEN:
|
if not self._ACCESS_TOKEN:
|
||||||
self.raise_login_required(method='password')
|
self.raise_login_required(method='password')
|
||||||
|
|
||||||
def _entries(self, items, language, **kwargs):
|
def _entries(self, items, language, type_, **kwargs):
|
||||||
for item in items:
|
for item in items:
|
||||||
video_id = item['id']
|
video_id = item['id']
|
||||||
stream_info = self._download_json(
|
stream_info = self._download_json(
|
||||||
|
@ -103,11 +116,11 @@ def _entries(self, items, language, **kwargs):
|
||||||
'start_time': chapter.get('time'),
|
'start_time': chapter.get('time'),
|
||||||
'end_time': try_get(chapter, lambda x: x['time'] + x['duration']),
|
'end_time': try_get(chapter, lambda x: x['time'] + x['duration']),
|
||||||
'title': chapter.get('text'),
|
'title': chapter.get('text'),
|
||||||
} for chapter in item['cuepoints']] if item.get('cuepoints') else None,
|
} for chapter in item['cuepoints']] if item.get('cuepoints') and type_ == 'concert' else None,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
language, video_id = self._match_valid_url(url).group('language', 'id')
|
language, type_, video_id = self._match_valid_url(url).group('language', 'type', 'id')
|
||||||
if not language:
|
if not language:
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
|
@ -120,18 +133,18 @@ def _real_extract(self, url):
|
||||||
}]
|
}]
|
||||||
|
|
||||||
vid_info = self._download_json(
|
vid_info = self._download_json(
|
||||||
f'https://api.digitalconcerthall.com/v2/concert/{video_id}', video_id, headers={
|
f'https://api.digitalconcerthall.com/v2/{type_}/{video_id}', video_id, headers={
|
||||||
'Accept': 'application/json',
|
'Accept': 'application/json',
|
||||||
'Accept-Language': language
|
'Accept-Language': language
|
||||||
})
|
})
|
||||||
album_artist = ' / '.join(traverse_obj(vid_info, ('_links', 'artist', ..., 'name')) or '')
|
album_artist = ' / '.join(traverse_obj(vid_info, ('_links', 'artist', ..., 'name')) or '')
|
||||||
|
videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': vid_info.get('title'),
|
'title': vid_info.get('title'),
|
||||||
'entries': self._entries(traverse_obj(vid_info, ('_embedded', ..., ...)), language,
|
'entries': self._entries(videos, language, thumbnails=thumbnails, album_artist=album_artist, type_=type_),
|
||||||
thumbnails=thumbnails, album_artist=album_artist),
|
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'album_artist': album_artist,
|
'album_artist': album_artist,
|
||||||
}
|
}
|
||||||
|
|
59
yt_dlp/extractor/elevensports.py
Normal file
59
yt_dlp/extractor/elevensports.py
Normal file
|
@ -0,0 +1,59 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
parse_iso8601,
|
||||||
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ElevenSportsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?elevensports\.com/view/event/(?P<id>\w+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://elevensports.com/view/event/clf46yr3kenn80jgrqsjmwefk',
|
||||||
|
'md5': 'c0958d9ff90e4503a75544358758921d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'clf46yr3kenn80jgrqsjmwefk',
|
||||||
|
'title': 'Cleveland SC vs Lionsbridge FC',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:03b5238d6549f4ea1fddadf69b5e0b58',
|
||||||
|
'upload_date': '20230323',
|
||||||
|
'timestamp': 1679612400,
|
||||||
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'}
|
||||||
|
}, {
|
||||||
|
'url': 'https://elevensports.com/view/event/clhpyd53b06160jez74qhgkmf',
|
||||||
|
'md5': 'c0958d9ff90e4503a75544358758921d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'clhpyd53b06160jez74qhgkmf',
|
||||||
|
'title': 'AJNLF vs ARRAF',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:c8c5e75c78f37c6d15cd6c475e43a8c1',
|
||||||
|
'upload_date': '20230521',
|
||||||
|
'timestamp': 1684684800,
|
||||||
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
event_id = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['event']['mclsEventId']
|
||||||
|
event_data = self._download_json(
|
||||||
|
f'https://mcls-api.mycujoo.tv/bff/events/v1beta1/{event_id}', video_id,
|
||||||
|
headers={'Authorization': 'Bearer FBVKACGN37JQC5SFA0OVK8KKSIOP153G'})
|
||||||
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
event_data['streams'][0]['full_url'], video_id, 'mp4', m3u8_id='hls')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
**traverse_obj(event_data, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'description': ('description', {str}),
|
||||||
|
'timestamp': ('start_time', {parse_iso8601}),
|
||||||
|
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||||
|
}),
|
||||||
|
}
|
|
@ -6,6 +6,7 @@
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
parse_qs,
|
parse_qs,
|
||||||
qualities,
|
qualities,
|
||||||
|
traverse_obj,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
xpath_text
|
xpath_text
|
||||||
)
|
)
|
||||||
|
@ -92,42 +93,17 @@ def get_item(type_, preference):
|
||||||
|
|
||||||
class EuroParlWebstreamIE(InfoExtractor):
|
class EuroParlWebstreamIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://(?:multimedia|webstreaming)\.europarl\.europa\.eu/[^/#?]+/
|
https?://multimedia\.europarl\.europa\.eu/[^/#?]+/
|
||||||
(?:embed/embed\.html\?event=|(?!video)[^/#?]+/[\w-]+_)(?P<id>[\w-]+)
|
(?:(?!video)[^/#?]+/[\w-]+_)(?P<id>[\w-]+)
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY',
|
'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'bcaa1db4-76ef-7e06-8da7-839bd0ad1dbe',
|
'id': '62388b15-d85b-4add-99aa-ba12ccf64f0d',
|
||||||
'ext': 'mp4',
|
|
||||||
'release_timestamp': 1663137900,
|
|
||||||
'title': 'Plenary session',
|
|
||||||
'release_date': '20220914',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/eu-cop27-un-climate-change-conference-in-sharm-el-sheikh-egypt-ep-delegation-meets-with-ngo-represen_20221114-1600-SPECIAL-OTHER',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'a8428de8-b9cd-6a2e-11e4-3805d9c9ff5c',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'release_timestamp': 1668434400,
|
|
||||||
'release_date': '20221114',
|
|
||||||
'title': 'md5:d3550280c33cc70e0678652e3d52c028',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
# embed webpage
|
|
||||||
'url': 'https://webstreaming.europarl.europa.eu/ep/embed/embed.html?event=20220914-0900-PLENARY&language=en&autoplay=true&logo=true',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'bcaa1db4-76ef-7e06-8da7-839bd0ad1dbe',
|
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Plenary session',
|
'title': 'Plenary session',
|
||||||
|
'release_timestamp': 1663139069,
|
||||||
'release_date': '20220914',
|
'release_date': '20220914',
|
||||||
'release_timestamp': 1663137900,
|
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
@ -144,30 +120,54 @@ class EuroParlWebstreamIE(InfoExtractor):
|
||||||
'live_status': 'is_live',
|
'live_status': 'is_live',
|
||||||
},
|
},
|
||||||
'skip': 'not live anymore'
|
'skip': 'not live anymore'
|
||||||
|
}, {
|
||||||
|
'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-culture-and-education_20230301-1130-COMMITTEE-CULT',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7355662c-8eac-445e-4bb9-08db14b0ddd7',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'release_date': '20230301',
|
||||||
|
'title': 'Committee on Culture and Education',
|
||||||
|
'release_timestamp': 1677666641,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# live stream
|
||||||
|
'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-environment-public-health-and-food-safety_20230524-0900-COMMITTEE-ENVI',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'e4255f56-10aa-4b3c-6530-08db56d5b0d9',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'release_date': '20230524',
|
||||||
|
'title': r're:Committee on Environment, Public Health and Food Safety \d{4}-\d{2}-\d{2}\s\d{2}:\d{2}',
|
||||||
|
'release_timestamp': 1684911541,
|
||||||
|
'live_status': 'is_live',
|
||||||
|
},
|
||||||
|
'skip': 'Not live anymore'
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
webpage_nextjs = self._search_nextjs_data(webpage, display_id)['props']['pageProps']
|
||||||
|
|
||||||
json_info = self._download_json(
|
json_info = self._download_json(
|
||||||
'https://vis-api.vuplay.co.uk/event/external', display_id,
|
'https://acs-api.europarl.connectedviews.eu/api/FullMeeting', display_id,
|
||||||
query={
|
query={
|
||||||
'player_key': 'europarl|718f822c-a48c-4841-9947-c9cb9bb1743c',
|
'api-version': 1.0,
|
||||||
'external_id': display_id,
|
'tenantId': 'bae646ca-1fc8-4363-80ba-2c04f06b4968',
|
||||||
|
'externalReference': display_id
|
||||||
})
|
})
|
||||||
|
|
||||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(json_info['streaming_url'], display_id)
|
formats, subtitles = [], {}
|
||||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
for hls_url in traverse_obj(json_info, ((('meetingVideo'), ('meetingVideos', ...)), 'hlsUrl')):
|
||||||
json_info['streaming_url'].replace('.mpd', '.m3u8'), display_id)
|
fmt, subs = self._extract_m3u8_formats_and_subtitles(hls_url, display_id)
|
||||||
|
formats.extend(fmt)
|
||||||
formats.extend(fmts)
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
self._merge_subtitles(subs, target=subtitles)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': json_info['id'],
|
'id': json_info['id'],
|
||||||
'title': json_info.get('title'),
|
'title': traverse_obj(webpage_nextjs, (('mediaItem', 'title'), ('title', )), get_all=False),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'release_timestamp': parse_iso8601(json_info.get('published_start')),
|
'release_timestamp': parse_iso8601(json_info.get('startDateTime')),
|
||||||
'is_live': 'LIVE' in json_info.get('state', '')
|
'is_live': traverse_obj(webpage_nextjs, ('mediaItem', 'mediaSubType')) == 'Live'
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
|
|
||||||
class EurosportIE(InfoExtractor):
|
class EurosportIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.eurosport\.com/\w+/[\w-]+/\d+/[\w-]+_(?P<id>vid\d+)'
|
_VALID_URL = r'https?://www\.eurosport\.com/\w+/(?:[\w-]+/[\d-]+/)?[\w-]+_(?P<id>vid\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml',
|
'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -44,6 +44,32 @@ class EurosportIE(InfoExtractor):
|
||||||
'description': 'md5:32bbe3a773ac132c57fb1e8cca4b7c71',
|
'description': 'md5:32bbe3a773ac132c57fb1e8cca4b7c71',
|
||||||
'upload_date': '20220727',
|
'upload_date': '20220727',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.eurosport.com/football/champions-league/2022-2023/pep-guardiola-emotionally-destroyed-after-manchester-city-win-over-bayern-munich-in-champions-league_vid1896254/video.shtml',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3096477',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:82edc17370124c7a19b3cf518517583b',
|
||||||
|
'duration': 84.0,
|
||||||
|
'description': 'md5:b3f44ef7f5b5b95b24a273b163083feb',
|
||||||
|
'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2023/04/12/3682873-74947393-2560-1440.jpg',
|
||||||
|
'timestamp': 1681292028,
|
||||||
|
'upload_date': '20230412',
|
||||||
|
'display_id': 'vid1896254',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.eurosport.com/football/last-year-s-semi-final-pain-was-still-there-pep-guardiola-after-man-city-reach-cl-final_vid1914115/video.shtml',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3149108',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '\'Last year\'s semi-final pain was still there\' - Pep Guardiola after Man City reach CL final',
|
||||||
|
'description': 'md5:89ef142fe0170a66abab77fac2955d8e',
|
||||||
|
'display_id': 'vid1914115',
|
||||||
|
'timestamp': 1684403618,
|
||||||
|
'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2023/05/18/3707254-75435008-2560-1440.jpg',
|
||||||
|
'duration': 105.0,
|
||||||
|
'upload_date': '20230518',
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_TOKEN = None
|
_TOKEN = None
|
||||||
|
|
|
@ -390,7 +390,10 @@ def extract_metadata(webpage):
|
||||||
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
|
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
|
||||||
title = get_first(media, ('title', 'text'))
|
title = get_first(media, ('title', 'text'))
|
||||||
description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text'))
|
description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text'))
|
||||||
uploader_data = get_first(media, 'owner') or get_first(post, ('node', 'actors', ...)) or {}
|
uploader_data = (
|
||||||
|
get_first(media, ('owner', {dict}))
|
||||||
|
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
|
||||||
|
or get_first(post, ('node', 'actors', ..., {dict})) or {})
|
||||||
|
|
||||||
page_title = title or self._html_search_regex((
|
page_title = title or self._html_search_regex((
|
||||||
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>',
|
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>',
|
||||||
|
@ -415,16 +418,17 @@ def extract_metadata(webpage):
|
||||||
# in https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/
|
# in https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/
|
||||||
if thumbnail and not re.search(r'\.(?:jpg|png)', thumbnail):
|
if thumbnail and not re.search(r'\.(?:jpg|png)', thumbnail):
|
||||||
thumbnail = None
|
thumbnail = None
|
||||||
view_count = parse_count(self._search_regex(
|
|
||||||
r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
|
|
||||||
default=None))
|
|
||||||
info_dict = {
|
info_dict = {
|
||||||
'description': description,
|
'description': description,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'uploader_id': uploader_data.get('id'),
|
'uploader_id': uploader_data.get('id'),
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'view_count': view_count,
|
'view_count': parse_count(self._search_regex(
|
||||||
|
(r'\bviewCount\s*:\s*["\']([\d,.]+)', r'video_view_count["\']\s*:\s*(\d+)',),
|
||||||
|
webpage, 'view count', default=None)),
|
||||||
|
'concurrent_view_count': get_first(post, (
|
||||||
|
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
|
||||||
}
|
}
|
||||||
|
|
||||||
info_json_ld = self._search_json_ld(webpage, video_id, default={})
|
info_json_ld = self._search_json_ld(webpage, video_id, default={})
|
||||||
|
|
115
yt_dlp/extractor/idolplus.py
Normal file
115
yt_dlp/extractor/idolplus.py
Normal file
|
@ -0,0 +1,115 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import traverse_obj, try_call, url_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class IdolPlusIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?idolplus\.com/z[us]/(?:concert/|contents/?\?(?:[^#]+&)?albumId=)(?P<id>\w+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://idolplus.com/zs/contents?albumId=M012077298PPV00',
|
||||||
|
'md5': '2ace3f4661c943a2f7e79f0b88cea1e7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'M012077298PPV00',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '[MultiCam] Aegyo on Top of Aegyo (IZ*ONE EATING TRIP)',
|
||||||
|
'release_date': '20200707',
|
||||||
|
'formats': 'count:65',
|
||||||
|
},
|
||||||
|
'params': {'format': '532-KIM_MINJU'},
|
||||||
|
}, {
|
||||||
|
'url': 'https://idolplus.com/zs/contents?albumId=M01232H058PPV00&catId=E9TX5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'M01232H058PPV00',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'YENA (CIRCLE CHART MUSIC AWARDS 2022 RED CARPET)',
|
||||||
|
'release_date': '20230218',
|
||||||
|
'formats': 'count:5',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
# live stream
|
||||||
|
'url': 'https://idolplus.com/zu/contents?albumId=M012323174PPV00',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'M012323174PPV00',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Hanteo Music Awards 2022 DAY2',
|
||||||
|
'release_date': '20230211',
|
||||||
|
'formats': 'count:5',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'url': 'https://idolplus.com/zs/concert/M012323039PPV00',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'M012323039PPV00',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'CIRCLE CHART MUSIC AWARDS 2022',
|
||||||
|
'release_date': '20230218',
|
||||||
|
'formats': 'count:5',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
data_list = traverse_obj(self._download_json(
|
||||||
|
'https://idolplus.com/api/zs/viewdata/ruleset/build', video_id,
|
||||||
|
headers={'App_type': 'web', 'Country_Code': 'KR'}, query={
|
||||||
|
'rulesetId': 'contents',
|
||||||
|
'albumId': video_id,
|
||||||
|
'distribute': 'PRD',
|
||||||
|
'loggedIn': 'false',
|
||||||
|
'region': 'zs',
|
||||||
|
'countryGroup': '00010',
|
||||||
|
'lang': 'en',
|
||||||
|
'saId': '999999999998',
|
||||||
|
}), ('data', 'viewData', ...))
|
||||||
|
|
||||||
|
player_data = {}
|
||||||
|
while data_list:
|
||||||
|
player_data = data_list.pop()
|
||||||
|
if traverse_obj(player_data, 'type') == 'player':
|
||||||
|
break
|
||||||
|
elif traverse_obj(player_data, ('dataList', ...)):
|
||||||
|
data_list += player_data['dataList']
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(traverse_obj(player_data, (
|
||||||
|
'vodPlayerList', 'vodProfile', 0, 'vodServer', 0, 'video_url', {url_or_none})), video_id)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for caption in traverse_obj(player_data, ('vodPlayerList', 'caption')) or []:
|
||||||
|
subtitles.setdefault(caption.get('lang') or 'und', []).append({
|
||||||
|
'url': caption.get('smi_url'),
|
||||||
|
'ext': 'vtt',
|
||||||
|
})
|
||||||
|
|
||||||
|
# Add member multicams as alternative formats
|
||||||
|
if (traverse_obj(player_data, ('detail', 'has_cuesheet')) == 'Y'
|
||||||
|
and traverse_obj(player_data, ('detail', 'is_omni_member')) == 'Y'):
|
||||||
|
cuesheet = traverse_obj(self._download_json(
|
||||||
|
'https://idolplus.com/gapi/contents/v1.0/content/cuesheet', video_id,
|
||||||
|
'Downloading JSON metadata for member multicams',
|
||||||
|
headers={'App_type': 'web', 'Country_Code': 'KR'}, query={
|
||||||
|
'ALBUM_ID': video_id,
|
||||||
|
'COUNTRY_GRP': '00010',
|
||||||
|
'LANG': 'en',
|
||||||
|
'SA_ID': '999999999998',
|
||||||
|
'COUNTRY_CODE': 'KR',
|
||||||
|
}), ('data', 'cuesheet_item', 0))
|
||||||
|
|
||||||
|
for member in traverse_obj(cuesheet, ('members', ...)):
|
||||||
|
index = try_call(lambda: int(member['omni_view_index']) - 1)
|
||||||
|
member_video_url = traverse_obj(cuesheet, ('omni_view', index, 'cdn_url', 0, 'url', {url_or_none}))
|
||||||
|
if not member_video_url:
|
||||||
|
continue
|
||||||
|
member_formats = self._extract_m3u8_formats(
|
||||||
|
member_video_url, video_id, note=f'Downloading m3u8 for multicam {member["name"]}')
|
||||||
|
for mf in member_formats:
|
||||||
|
mf['format_id'] = f'{mf["format_id"]}-{member["name"].replace(" ", "_")}'
|
||||||
|
formats.extend(member_formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': traverse_obj(player_data, ('detail', 'albumName')),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'release_date': traverse_obj(player_data, ('detail', 'broadcastDate')),
|
||||||
|
}
|
|
@ -1,6 +1,7 @@
|
||||||
import functools
|
import functools
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
@ -14,7 +15,49 @@
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class IwaraIE(InfoExtractor):
|
# https://github.com/yt-dlp/yt-dlp/issues/6671
|
||||||
|
class IwaraBaseIE(InfoExtractor):
|
||||||
|
_USERTOKEN = None
|
||||||
|
_MEDIATOKEN = None
|
||||||
|
_NETRC_MACHINE = 'iwara'
|
||||||
|
|
||||||
|
def _get_user_token(self, invalidate=False):
|
||||||
|
if not invalidate and self._USERTOKEN:
|
||||||
|
return self._USERTOKEN
|
||||||
|
|
||||||
|
username, password = self._get_login_info()
|
||||||
|
IwaraBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username)
|
||||||
|
if not IwaraBaseIE._USERTOKEN or invalidate:
|
||||||
|
IwaraBaseIE._USERTOKEN = self._download_json(
|
||||||
|
'https://api.iwara.tv/user/login', None, note='Logging in',
|
||||||
|
data=json.dumps({
|
||||||
|
'email': username,
|
||||||
|
'password': password
|
||||||
|
}).encode('utf-8'),
|
||||||
|
headers={
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
})['token']
|
||||||
|
|
||||||
|
self.cache.store(self._NETRC_MACHINE, username, IwaraBaseIE._USERTOKEN)
|
||||||
|
|
||||||
|
return self._USERTOKEN
|
||||||
|
|
||||||
|
def _get_media_token(self, invalidate=False):
|
||||||
|
if not invalidate and self._MEDIATOKEN:
|
||||||
|
return self._MEDIATOKEN
|
||||||
|
|
||||||
|
IwaraBaseIE._MEDIATOKEN = self._download_json(
|
||||||
|
'https://api.iwara.tv/user/token', None, note='Fetching media token',
|
||||||
|
data=b'', # Need to have some data here, even if it's empty
|
||||||
|
headers={
|
||||||
|
'Authorization': f'Bearer {self._get_user_token()}',
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
})['accessToken']
|
||||||
|
|
||||||
|
return self._MEDIATOKEN
|
||||||
|
|
||||||
|
|
||||||
|
class IwaraIE(IwaraBaseIE):
|
||||||
IE_NAME = 'iwara'
|
IE_NAME = 'iwara'
|
||||||
_VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos?/(?P<id>[a-zA-Z0-9]+)'
|
_VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos?/(?P<id>[a-zA-Z0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -56,6 +99,26 @@ class IwaraIE(InfoExtractor):
|
||||||
'timestamp': 1678732213,
|
'timestamp': 1678732213,
|
||||||
'modified_timestamp': 1679110271,
|
'modified_timestamp': 1679110271,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://iwara.tv/video/blggmfno8ghl725bg',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'blggmfno8ghl725bg',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'age_limit': 18,
|
||||||
|
'title': 'お外でおしっこしちゃう猫耳ロリメイド',
|
||||||
|
'description': 'md5:0342ba9bf6db09edbbb28729657c3611',
|
||||||
|
'uploader': 'Fe_Kurosabi',
|
||||||
|
'uploader_id': 'fekurosabi',
|
||||||
|
'tags': [
|
||||||
|
'pee'
|
||||||
|
],
|
||||||
|
'like_count': 192,
|
||||||
|
'view_count': 12119,
|
||||||
|
'comment_count': 0,
|
||||||
|
'timestamp': 1598880567,
|
||||||
|
'modified_timestamp': 1598908995,
|
||||||
|
'availability': 'needs_auth',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_formats(self, video_id, fileurl):
|
def _extract_formats(self, video_id, fileurl):
|
||||||
|
@ -79,12 +142,18 @@ def _extract_formats(self, video_id, fileurl):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
video_data = self._download_json(f'https://api.iwara.tv/video/{video_id}', video_id, expected_status=lambda x: True)
|
username, password = self._get_login_info()
|
||||||
|
headers = {
|
||||||
|
'Authorization': f'Bearer {self._get_media_token()}',
|
||||||
|
} if username and password else None
|
||||||
|
video_data = self._download_json(f'https://api.iwara.tv/video/{video_id}', video_id, expected_status=lambda x: True, headers=headers)
|
||||||
errmsg = video_data.get('message')
|
errmsg = video_data.get('message')
|
||||||
# at this point we can actually get uploaded user info, but do we need it?
|
# at this point we can actually get uploaded user info, but do we need it?
|
||||||
if errmsg == 'errors.privateVideo':
|
if errmsg == 'errors.privateVideo':
|
||||||
self.raise_login_required('Private video. Login if you have permissions to watch')
|
self.raise_login_required('Private video. Login if you have permissions to watch')
|
||||||
elif errmsg:
|
elif errmsg == 'errors.notFound' and not username:
|
||||||
|
self.raise_login_required('Video may need login to view')
|
||||||
|
elif errmsg: # None if success
|
||||||
raise ExtractorError(f'Iwara says: {errmsg}')
|
raise ExtractorError(f'Iwara says: {errmsg}')
|
||||||
|
|
||||||
if not video_data.get('fileUrl'):
|
if not video_data.get('fileUrl'):
|
||||||
|
@ -112,8 +181,17 @@ def _real_extract(self, url):
|
||||||
'formats': list(self._extract_formats(video_id, video_data.get('fileUrl'))),
|
'formats': list(self._extract_formats(video_id, video_data.get('fileUrl'))),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _perform_login(self, username, password):
|
||||||
|
if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token():
|
||||||
|
self.write_debug('Skipping logging in')
|
||||||
|
return
|
||||||
|
|
||||||
class IwaraUserIE(InfoExtractor):
|
IwaraBaseIE._USERTOKEN = self._get_user_token(True)
|
||||||
|
self._get_media_token(True)
|
||||||
|
self.cache.store(self._NETRC_MACHINE, username, IwaraBaseIE._USERTOKEN)
|
||||||
|
|
||||||
|
|
||||||
|
class IwaraUserIE(IwaraBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?iwara\.tv/profile/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?iwara\.tv/profile/(?P<id>[^/?#&]+)'
|
||||||
IE_NAME = 'iwara:user'
|
IE_NAME = 'iwara:user'
|
||||||
_PER_PAGE = 32
|
_PER_PAGE = 32
|
||||||
|
@ -165,7 +243,7 @@ def _real_extract(self, url):
|
||||||
playlist_id, traverse_obj(user_info, ('user', 'name')))
|
playlist_id, traverse_obj(user_info, ('user', 'name')))
|
||||||
|
|
||||||
|
|
||||||
class IwaraPlaylistIE(InfoExtractor):
|
class IwaraPlaylistIE(IwaraBaseIE):
|
||||||
# the ID is an UUID but I don't think it's necessary to write concrete regex
|
# the ID is an UUID but I don't think it's necessary to write concrete regex
|
||||||
_VALID_URL = r'https?://(?:www\.)?iwara\.tv/playlist/(?P<id>[0-9a-f-]+)'
|
_VALID_URL = r'https?://(?:www\.)?iwara\.tv/playlist/(?P<id>[0-9a-f-]+)'
|
||||||
IE_NAME = 'iwara:playlist'
|
IE_NAME = 'iwara:playlist'
|
||||||
|
|
73
yt_dlp/extractor/jstream.py
Normal file
73
yt_dlp/extractor/jstream.py
Normal file
|
@ -0,0 +1,73 @@
|
||||||
|
import base64
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
js_to_json,
|
||||||
|
remove_start,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class JStreamIE(InfoExtractor):
|
||||||
|
# group "id" only exists for compliance, not directly used in requests
|
||||||
|
# also all components are mandatory
|
||||||
|
_VALID_URL = r'jstream:(?P<host>www\d+):(?P<id>(?P<publisher>[a-z0-9]+):(?P<mid>\d+))'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'jstream:www50:eqd638pvwx:752',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'eqd638pvwx:752',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '阪神淡路大震災 激震の記録2020年版 解説動画',
|
||||||
|
'duration': 672,
|
||||||
|
'thumbnail': r're:https?://eqd638pvwx\.eq\.webcdn\.stream\.ne\.jp/.+\.jpg',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _parse_jsonp(self, callback, string, video_id):
|
||||||
|
return self._search_json(rf'\s*{re.escape(callback)}\s*\(', string, callback, video_id)
|
||||||
|
|
||||||
|
def _find_formats(self, video_id, movie_list_hls, host, publisher, subtitles):
|
||||||
|
for value in movie_list_hls:
|
||||||
|
text = value.get('text') or ''
|
||||||
|
if not text.startswith('auto'):
|
||||||
|
continue
|
||||||
|
m3u8_id = remove_start(remove_start(text, 'auto'), '_') or None
|
||||||
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
f'https://{publisher}.eq.webcdn.stream.ne.jp/{host}/{publisher}/jmc_pub/{value.get("url")}', video_id, 'mp4', m3u8_id=m3u8_id)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
yield from fmts
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
host, publisher, mid, video_id = self._match_valid_url(url).group('host', 'publisher', 'mid', 'id')
|
||||||
|
video_info_jsonp = self._download_webpage(
|
||||||
|
f'https://{publisher}.eq.webcdn.stream.ne.jp/{host}/{publisher}/jmc_pub/eq_meta/v1/{mid}.jsonp',
|
||||||
|
video_id, 'Requesting video info')
|
||||||
|
video_info = self._parse_jsonp('metaDataResult', video_info_jsonp, video_id)['movie']
|
||||||
|
subtitles = {}
|
||||||
|
formats = list(self._find_formats(video_id, video_info.get('movie_list_hls'), host, publisher, subtitles))
|
||||||
|
self._remove_duplicate_formats(formats)
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_info.get('title'),
|
||||||
|
'duration': float_or_none(video_info.get('duration')),
|
||||||
|
'thumbnail': video_info.get('thumbnail_url'),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_embed_urls(cls, url, webpage):
|
||||||
|
# check for eligiblity of webpage
|
||||||
|
# https://support.eq.stream.co.jp/hc/ja/articles/115008388147-%E3%83%97%E3%83%AC%E3%82%A4%E3%83%A4%E3%83%BCAPI%E3%81%AE%E3%82%B5%E3%83%B3%E3%83%97%E3%83%AB%E3%82%B3%E3%83%BC%E3%83%89
|
||||||
|
script_tag = re.search(r'<script\s*[^>]+?src="https://ssl-cache\.stream\.ne\.jp/(?P<host>www\d+)/(?P<publisher>[a-z0-9]+)/[^"]+?/if\.js"', webpage)
|
||||||
|
if not script_tag:
|
||||||
|
return
|
||||||
|
host, publisher = script_tag.groups()
|
||||||
|
for m in re.finditer(r'(?s)PlayerFactoryIF\.create\(\s*({[^\}]+?})\s*\)\s*;', webpage):
|
||||||
|
# TODO: using json.loads here as InfoExtractor._parse_json is not classmethod
|
||||||
|
info = json.loads(js_to_json(m.group(1)))
|
||||||
|
mid = base64.b64decode(info.get('m')).decode()
|
||||||
|
yield f'jstream:{host}:{publisher}:{mid}'
|
|
@ -1,70 +0,0 @@
|
||||||
from .canvas import CanvasIE
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..compat import compat_urllib_parse_unquote
|
|
||||||
from ..utils import (
|
|
||||||
int_or_none,
|
|
||||||
parse_iso8601,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class KetnetIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?P<id>(?:[^/]+/)*[^/?#&]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://www.ketnet.be/kijken/n/nachtwacht/3/nachtwacht-s3a1-de-greystook',
|
|
||||||
'md5': '37b2b7bb9b3dcaa05b67058dc3a714a9',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'pbs-pub-aef8b526-115e-4006-aa24-e59ff6c6ef6f$vid-ddb815bf-c8e7-467b-8879-6bad7a32cebd',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Nachtwacht - Reeks 3: Aflevering 1',
|
|
||||||
'description': 'De Nachtwacht krijgt te maken met een parasiet',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'duration': 1468.02,
|
|
||||||
'timestamp': 1609225200,
|
|
||||||
'upload_date': '20201229',
|
|
||||||
'series': 'Nachtwacht',
|
|
||||||
'season': 'Reeks 3',
|
|
||||||
'episode': 'De Greystook',
|
|
||||||
'episode_number': 1,
|
|
||||||
},
|
|
||||||
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.ketnet.be/themas/karrewiet/jaaroverzicht-20200/karrewiet-het-jaar-van-black-mamba',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
|
|
||||||
video = self._download_json(
|
|
||||||
'https://senior-bff.ketnet.be/graphql', display_id, query={
|
|
||||||
'query': '''{
|
|
||||||
video(id: "content/ketnet/nl/%s.model.json") {
|
|
||||||
description
|
|
||||||
episodeNr
|
|
||||||
imageUrl
|
|
||||||
mediaReference
|
|
||||||
programTitle
|
|
||||||
publicationDate
|
|
||||||
seasonTitle
|
|
||||||
subtitleVideodetail
|
|
||||||
titleVideodetail
|
|
||||||
}
|
|
||||||
}''' % display_id,
|
|
||||||
})['data']['video']
|
|
||||||
|
|
||||||
mz_id = compat_urllib_parse_unquote(video['mediaReference'])
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'id': mz_id,
|
|
||||||
'title': video['titleVideodetail'],
|
|
||||||
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/' + mz_id,
|
|
||||||
'thumbnail': video.get('imageUrl'),
|
|
||||||
'description': video.get('description'),
|
|
||||||
'timestamp': parse_iso8601(video.get('publicationDate')),
|
|
||||||
'series': video.get('programTitle'),
|
|
||||||
'season': video.get('seasonTitle'),
|
|
||||||
'episode': video.get('subtitleVideodetail'),
|
|
||||||
'episode_number': int_or_none(video.get('episodeNr')),
|
|
||||||
'ie_key': CanvasIE.ie_key(),
|
|
||||||
}
|
|
|
@ -4,8 +4,8 @@
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
traverse_obj,
|
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
|
traverse_obj,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -113,7 +113,7 @@ def _real_extract(self, url):
|
||||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||||
for a_format in formats:
|
for a_format in formats:
|
||||||
# LiTV HLS segments doesn't like compressions
|
# LiTV HLS segments doesn't like compressions
|
||||||
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = True
|
a_format.setdefault('http_headers', {})['Accept-Encoding'] = 'identity'
|
||||||
|
|
||||||
title = program_info['title'] + program_info.get('secondaryMark', '')
|
title = program_info['title'] + program_info.get('secondaryMark', '')
|
||||||
description = program_info.get('description')
|
description = program_info.get('description')
|
||||||
|
|
|
@ -1,33 +1,36 @@
|
||||||
import re
|
|
||||||
import itertools
|
import itertools
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_str, compat_urlparse
|
||||||
compat_str,
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
find_xpath_attr,
|
|
||||||
xpath_attr,
|
|
||||||
xpath_with_ns,
|
|
||||||
xpath_text,
|
|
||||||
orderedSet,
|
|
||||||
update_url_query,
|
|
||||||
int_or_none,
|
|
||||||
float_or_none,
|
|
||||||
parse_iso8601,
|
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
find_xpath_attr,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
orderedSet,
|
||||||
|
parse_iso8601,
|
||||||
|
traverse_obj,
|
||||||
|
update_url_query,
|
||||||
|
xpath_attr,
|
||||||
|
xpath_text,
|
||||||
|
xpath_with_ns,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class LivestreamIE(InfoExtractor):
|
class LivestreamIE(InfoExtractor):
|
||||||
IE_NAME = 'livestream'
|
IE_NAME = 'livestream'
|
||||||
_VALID_URL = r'https?://(?:new\.)?livestream\.com/(?:accounts/(?P<account_id>\d+)|(?P<account_name>[^/]+))/(?:events/(?P<event_id>\d+)|(?P<event_name>[^/]+))(?:/videos/(?P<id>\d+))?'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://(?:new\.)?livestream\.com/
|
||||||
|
(?:accounts/(?P<account_id>\d+)|(?P<account_name>[^/]+))
|
||||||
|
(?:/events/(?P<event_id>\d+)|/(?P<event_name>[^/]+))?
|
||||||
|
(?:/videos/(?P<id>\d+))?
|
||||||
|
'''
|
||||||
_EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"']
|
_EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"']
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
|
'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
|
||||||
'md5': '53274c76ba7754fb0e8d072716f2292b',
|
'md5': '7876c5f5dc3e711b6b73acce4aac1527',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4719370',
|
'id': '4719370',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -37,22 +40,37 @@ class LivestreamIE(InfoExtractor):
|
||||||
'duration': 5968.0,
|
'duration': 5968.0,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
'thumbnail': r're:^http://.*\.jpg$'
|
'thumbnail': r're:^http://.*\.jpg$'
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://new.livestream.com/tedx/cityenglish',
|
'url': 'https://livestream.com/coheedandcambria/websterhall',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'TEDCity2.0 (English)',
|
'id': '1585861',
|
||||||
'id': '2245590',
|
'title': 'Live From Webster Hall'
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1,
|
||||||
|
}, {
|
||||||
|
'url': 'https://livestream.com/dayananda/events/7954027',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Live from Mevo',
|
||||||
|
'id': '7954027',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 4,
|
'playlist_mincount': 4,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://new.livestream.com/chess24/tatasteelchess',
|
'url': 'https://livestream.com/accounts/82',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Tata Steel Chess',
|
'id': '253978',
|
||||||
'id': '3705884',
|
'view_count': int,
|
||||||
},
|
'title': 'trsr',
|
||||||
'playlist_mincount': 60,
|
'comment_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'upload_date': '20120306',
|
||||||
|
'timestamp': 1331042383,
|
||||||
|
'thumbnail': 'http://img.new.livestream.com/videos/0000000000000372/cacbeed6-fb68-4b5e-ad9c-e148124e68a9_640x427.jpg',
|
||||||
|
'duration': 15.332,
|
||||||
|
'ext': 'mp4'
|
||||||
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
|
'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -179,7 +197,7 @@ def _extract_stream_info(self, stream_info):
|
||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _extract_event(self, event_data):
|
def _generate_event_playlist(self, event_data):
|
||||||
event_id = compat_str(event_data['id'])
|
event_id = compat_str(event_data['id'])
|
||||||
account_id = compat_str(event_data['owner_account_id'])
|
account_id = compat_str(event_data['owner_account_id'])
|
||||||
feed_root_url = self._API_URL_TEMPLATE % (account_id, event_id) + '/feed.json'
|
feed_root_url = self._API_URL_TEMPLATE % (account_id, event_id) + '/feed.json'
|
||||||
|
@ -189,7 +207,6 @@ def _extract_event(self, event_data):
|
||||||
return self._extract_stream_info(stream_info)
|
return self._extract_stream_info(stream_info)
|
||||||
|
|
||||||
last_video = None
|
last_video = None
|
||||||
entries = []
|
|
||||||
for i in itertools.count(1):
|
for i in itertools.count(1):
|
||||||
if last_video is None:
|
if last_video is None:
|
||||||
info_url = feed_root_url
|
info_url = feed_root_url
|
||||||
|
@ -197,31 +214,38 @@ def _extract_event(self, event_data):
|
||||||
info_url = '{root}?&id={id}&newer=-1&type=video'.format(
|
info_url = '{root}?&id={id}&newer=-1&type=video'.format(
|
||||||
root=feed_root_url, id=last_video)
|
root=feed_root_url, id=last_video)
|
||||||
videos_info = self._download_json(
|
videos_info = self._download_json(
|
||||||
info_url, event_id, 'Downloading page {0}'.format(i))['data']
|
info_url, event_id, f'Downloading page {i}')['data']
|
||||||
videos_info = [v['data'] for v in videos_info if v['type'] == 'video']
|
videos_info = [v['data'] for v in videos_info if v['type'] == 'video']
|
||||||
if not videos_info:
|
if not videos_info:
|
||||||
break
|
break
|
||||||
for v in videos_info:
|
for v in videos_info:
|
||||||
v_id = compat_str(v['id'])
|
v_id = compat_str(v['id'])
|
||||||
entries.append(self.url_result(
|
yield self.url_result(
|
||||||
'http://livestream.com/accounts/%s/events/%s/videos/%s' % (account_id, event_id, v_id),
|
f'http://livestream.com/accounts/{account_id}/events/{event_id}/videos/{v_id}',
|
||||||
'Livestream', v_id, v.get('caption')))
|
LivestreamIE, v_id, v.get('caption'))
|
||||||
last_video = videos_info[-1]['id']
|
last_video = videos_info[-1]['id']
|
||||||
return self.playlist_result(entries, event_id, event_data['full_name'])
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = self._match_valid_url(url)
|
mobj = self._match_valid_url(url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
event = mobj.group('event_id') or mobj.group('event_name')
|
event = mobj.group('event_id') or mobj.group('event_name')
|
||||||
account = mobj.group('account_id') or mobj.group('account_name')
|
account = mobj.group('account_id') or mobj.group('account_name')
|
||||||
api_url = self._API_URL_TEMPLATE % (account, event)
|
api_url = f'http://livestream.com/api/accounts/{account}'
|
||||||
|
|
||||||
if video_id:
|
if video_id:
|
||||||
video_data = self._download_json(
|
video_data = self._download_json(
|
||||||
api_url + '/videos/%s' % video_id, video_id)
|
f'{api_url}/events/{event}/videos/{video_id}', video_id)
|
||||||
return self._extract_video_info(video_data)
|
return self._extract_video_info(video_data)
|
||||||
else:
|
elif event:
|
||||||
event_data = self._download_json(api_url, video_id)
|
event_data = self._download_json(f'{api_url}/events/{event}', None)
|
||||||
return self._extract_event(event_data)
|
return self.playlist_result(
|
||||||
|
self._generate_event_playlist(event_data), str(event_data['id']), event_data['full_name'])
|
||||||
|
|
||||||
|
account_data = self._download_json(api_url, None)
|
||||||
|
items = traverse_obj(account_data, (('upcoming_events', 'past_events'), 'data', ...))
|
||||||
|
return self.playlist_result(
|
||||||
|
itertools.chain.from_iterable(map(self._generate_event_playlist, items)),
|
||||||
|
account_data.get('id'), account_data.get('full_name'))
|
||||||
|
|
||||||
|
|
||||||
# The original version of Livestream uses a different system
|
# The original version of Livestream uses a different system
|
||||||
|
|
92
yt_dlp/extractor/mzaalo.py
Normal file
92
yt_dlp/extractor/mzaalo.py
Normal file
|
@ -0,0 +1,92 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
parse_age_limit,
|
||||||
|
parse_duration,
|
||||||
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MzaaloIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?mzaalo\.com/play/(?P<type>movie|original|clip)/(?P<id>[a-fA-F0-9-]+)/[\w-]+'
|
||||||
|
_TESTS = [{
|
||||||
|
# Movies
|
||||||
|
'url': 'https://www.mzaalo.com/play/movie/c0958d9f-f90e-4503-a755-44358758921d/Jamun',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'c0958d9f-f90e-4503-a755-44358758921d',
|
||||||
|
'title': 'Jamun',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:24fe9ebb9bbe5b36f7b54b90ab1e2f31',
|
||||||
|
'thumbnails': 'count:15',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 5527.0,
|
||||||
|
'language': 'hin',
|
||||||
|
'categories': ['Drama'],
|
||||||
|
'age_limit': 13,
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'}
|
||||||
|
}, {
|
||||||
|
# Shows
|
||||||
|
'url': 'https://www.mzaalo.com/play/original/93d42b2b-f373-4c2d-bca4-997412cb069d/Modi-Season-2-CM-TO-PM/Episode-1:Decision,-Not-Promises',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '93d42b2b-f373-4c2d-bca4-997412cb069d',
|
||||||
|
'title': 'Episode 1:Decision, Not Promises',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:16f76058432a54774fbb2561a1955652',
|
||||||
|
'thumbnails': 'count:22',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 2040.0,
|
||||||
|
'language': 'hin',
|
||||||
|
'categories': ['Drama'],
|
||||||
|
'age_limit': 13,
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'}
|
||||||
|
}, {
|
||||||
|
# Streams/Clips
|
||||||
|
'url': 'https://www.mzaalo.com/play/clip/83cdbcb5-400a-42f1-a1d2-459053cfbda5/Manto-Ki-Kahaaniya',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '83cdbcb5-400a-42f1-a1d2-459053cfbda5',
|
||||||
|
'title': 'Manto Ki Kahaaniya',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:c3c5f1d05f0fd1bfcb05b673d1cc9f2f',
|
||||||
|
'thumbnails': 'count:3',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 1937.0,
|
||||||
|
'language': 'hin',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, type_ = self._match_valid_url(url).group('id', 'type')
|
||||||
|
path = (f'partner/streamurl?&assetId={video_id}&getClipDetails=YES' if type_ == 'clip'
|
||||||
|
else f'api/v2/player/details?assetType={type_.upper()}&assetId={video_id}')
|
||||||
|
data = self._download_json(
|
||||||
|
f'https://production.mzaalo.com/platform/{path}', video_id, headers={
|
||||||
|
'Ocp-Apim-Subscription-Key': '1d0caac2702049b89a305929fdf4cbae',
|
||||||
|
})['data']
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(data['streamURL'], video_id)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for subs_lang, subs_url in traverse_obj(data, ('subtitles', {dict.items}, ...)):
|
||||||
|
if url_or_none(subs_url):
|
||||||
|
subtitles[subs_lang] = [{'url': subs_url, 'ext': 'vtt'}]
|
||||||
|
|
||||||
|
lang = traverse_obj(data, ('language', {str.lower}))
|
||||||
|
for f in formats:
|
||||||
|
f['language'] = lang
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
**traverse_obj(data, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'description': ('description', {str}),
|
||||||
|
'duration': ('duration', {parse_duration}),
|
||||||
|
'age_limit': ('maturity_rating', {parse_age_limit}),
|
||||||
|
'thumbnails': ('images', ..., {'url': {url_or_none}}),
|
||||||
|
'categories': ('genre', ..., {str}),
|
||||||
|
}),
|
||||||
|
}
|
|
@ -21,7 +21,7 @@
|
||||||
class NaverBaseIE(InfoExtractor):
|
class NaverBaseIE(InfoExtractor):
|
||||||
_CAPTION_EXT_RE = r'\.(?:ttml|vtt)'
|
_CAPTION_EXT_RE = r'\.(?:ttml|vtt)'
|
||||||
|
|
||||||
@staticmethod # NB: Used in VLiveWebArchiveIE
|
@staticmethod # NB: Used in VLiveWebArchiveIE, WeverseIE
|
||||||
def process_subtitles(vod_data, process_url):
|
def process_subtitles(vod_data, process_url):
|
||||||
ret = {'subtitles': {}, 'automatic_captions': {}}
|
ret = {'subtitles': {}, 'automatic_captions': {}}
|
||||||
for caption in traverse_obj(vod_data, ('captions', 'list', ...)):
|
for caption in traverse_obj(vod_data, ('captions', 'list', ...)):
|
||||||
|
|
217
yt_dlp/extractor/nekohacker.py
Normal file
217
yt_dlp/extractor/nekohacker.py
Normal file
|
@ -0,0 +1,217 @@
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
determine_ext,
|
||||||
|
extract_attributes,
|
||||||
|
get_element_by_class,
|
||||||
|
get_element_text_and_html_by_tag,
|
||||||
|
parse_duration,
|
||||||
|
traverse_obj,
|
||||||
|
try_call,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NekoHackerIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?nekohacker\.com/(?P<id>(?!free-dl)[\w-]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://nekohacker.com/nekoverse/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'nekoverse',
|
||||||
|
'title': 'Nekoverse',
|
||||||
|
},
|
||||||
|
'playlist': [
|
||||||
|
{
|
||||||
|
'url': 'https://nekohacker.com/wp-content/uploads/2022/11/01-Spaceship.mp3',
|
||||||
|
'md5': '44223701ebedba0467ebda4cc07fb3aa',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1712',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Spaceship',
|
||||||
|
'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg',
|
||||||
|
'vcodec': 'none',
|
||||||
|
'acodec': 'mp3',
|
||||||
|
'release_date': '20221101',
|
||||||
|
'album': 'Nekoverse',
|
||||||
|
'artist': 'Neko Hacker',
|
||||||
|
'track': 'Spaceship',
|
||||||
|
'track_number': 1,
|
||||||
|
'duration': 195.0
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://nekohacker.com/wp-content/uploads/2022/11/02-City-Runner.mp3',
|
||||||
|
'md5': '8f853c71719389d32bbbd3f1a87b3f08',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1713',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'City Runner',
|
||||||
|
'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg',
|
||||||
|
'vcodec': 'none',
|
||||||
|
'acodec': 'mp3',
|
||||||
|
'release_date': '20221101',
|
||||||
|
'album': 'Nekoverse',
|
||||||
|
'artist': 'Neko Hacker',
|
||||||
|
'track': 'City Runner',
|
||||||
|
'track_number': 2,
|
||||||
|
'duration': 148.0
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://nekohacker.com/wp-content/uploads/2022/11/03-Nature-Talk.mp3',
|
||||||
|
'md5': '5a8a8ae852720cee4c0ac95c7d1a7450',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1714',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Nature Talk',
|
||||||
|
'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg',
|
||||||
|
'vcodec': 'none',
|
||||||
|
'acodec': 'mp3',
|
||||||
|
'release_date': '20221101',
|
||||||
|
'album': 'Nekoverse',
|
||||||
|
'artist': 'Neko Hacker',
|
||||||
|
'track': 'Nature Talk',
|
||||||
|
'track_number': 3,
|
||||||
|
'duration': 174.0
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://nekohacker.com/wp-content/uploads/2022/11/04-Crystal-World.mp3',
|
||||||
|
'md5': 'd8e59a48061764e50d92386a294abd50',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1715',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Crystal World',
|
||||||
|
'thumbnail': 'https://nekohacker.com/wp-content/uploads/2022/11/Nekoverse_Artwork-1024x1024.jpg',
|
||||||
|
'vcodec': 'none',
|
||||||
|
'acodec': 'mp3',
|
||||||
|
'release_date': '20221101',
|
||||||
|
'album': 'Nekoverse',
|
||||||
|
'artist': 'Neko Hacker',
|
||||||
|
'track': 'Crystal World',
|
||||||
|
'track_number': 4,
|
||||||
|
'duration': 199.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}, {
|
||||||
|
'url': 'https://nekohacker.com/susume/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'susume',
|
||||||
|
'title': '進め!むじなカンパニー',
|
||||||
|
},
|
||||||
|
'playlist': [
|
||||||
|
{
|
||||||
|
'url': 'https://nekohacker.com/wp-content/uploads/2021/01/進め!むじなカンパニー-feat.-六科なじむ-CV_-日高里菜-割戶真友-CV_-金元寿子-軽井沢ユキ-CV_-上坂すみれ-出稼ぎガルシア-CV_-金子彩花-.mp3',
|
||||||
|
'md5': 'fb13f008aa81f26ba48f91fd2d6186ce',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '711',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'md5:1a5fcbc96ca3c3265b1c6f9f79f30fd0',
|
||||||
|
'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png',
|
||||||
|
'vcodec': 'none',
|
||||||
|
'acodec': 'mp3',
|
||||||
|
'release_date': '20210115',
|
||||||
|
'album': '進め!むじなカンパニー',
|
||||||
|
'artist': 'Neko Hacker',
|
||||||
|
'track': 'md5:1a5fcbc96ca3c3265b1c6f9f79f30fd0',
|
||||||
|
'track_number': 1,
|
||||||
|
'duration': None
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://nekohacker.com/wp-content/uploads/2021/01/むじな-de-なじむ-feat.-六科なじむ-CV_-日高里菜-.mp3',
|
||||||
|
'md5': '028803f70241df512b7764e73396fdd1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '709',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'むじな de なじむ feat. 六科なじむ (CV: 日高里菜 )',
|
||||||
|
'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png',
|
||||||
|
'vcodec': 'none',
|
||||||
|
'acodec': 'mp3',
|
||||||
|
'release_date': '20210115',
|
||||||
|
'album': '進め!むじなカンパニー',
|
||||||
|
'artist': 'Neko Hacker',
|
||||||
|
'track': 'むじな de なじむ feat. 六科なじむ (CV: 日高里菜 )',
|
||||||
|
'track_number': 2,
|
||||||
|
'duration': None
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://nekohacker.com/wp-content/uploads/2021/01/進め!むじなカンパニー-instrumental.mp3',
|
||||||
|
'md5': 'adde9e9a16e1da5e602b579c247d0fb9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '710',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': '進め!むじなカンパニー (instrumental)',
|
||||||
|
'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png',
|
||||||
|
'vcodec': 'none',
|
||||||
|
'acodec': 'mp3',
|
||||||
|
'release_date': '20210115',
|
||||||
|
'album': '進め!むじなカンパニー',
|
||||||
|
'artist': 'Neko Hacker',
|
||||||
|
'track': '進め!むじなカンパニー (instrumental)',
|
||||||
|
'track_number': 3,
|
||||||
|
'duration': None
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://nekohacker.com/wp-content/uploads/2021/01/むじな-de-なじむ-instrumental.mp3',
|
||||||
|
'md5': 'ebb0443039cf5f9ff7fd557ed9b23599',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '712',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'むじな de なじむ (instrumental)',
|
||||||
|
'thumbnail': 'https://nekohacker.com/wp-content/uploads/2021/01/OP表-1024x1024.png',
|
||||||
|
'vcodec': 'none',
|
||||||
|
'acodec': 'mp3',
|
||||||
|
'release_date': '20210115',
|
||||||
|
'album': '進め!むじなカンパニー',
|
||||||
|
'artist': 'Neko Hacker',
|
||||||
|
'track': 'むじな de なじむ (instrumental)',
|
||||||
|
'track_number': 4,
|
||||||
|
'duration': None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
playlist = get_element_by_class('playlist', webpage)
|
||||||
|
|
||||||
|
if not playlist:
|
||||||
|
iframe = try_call(lambda: get_element_text_and_html_by_tag('iframe', webpage)[1]) or ''
|
||||||
|
iframe_src = url_or_none(extract_attributes(iframe).get('src'))
|
||||||
|
if not iframe_src:
|
||||||
|
raise ExtractorError('No playlist or embed found in webpage')
|
||||||
|
elif re.match(r'https?://(?:\w+\.)?spotify\.com/', iframe_src):
|
||||||
|
raise ExtractorError('Spotify embeds are not supported', expected=True)
|
||||||
|
return self.url_result(url, 'Generic')
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for track_number, track in enumerate(re.findall(r'(<li[^>]+data-audiopath[^>]+>)', playlist), 1):
|
||||||
|
entry = traverse_obj(extract_attributes(track), {
|
||||||
|
'url': ('data-audiopath', {url_or_none}),
|
||||||
|
'ext': ('data-audiopath', {determine_ext}),
|
||||||
|
'id': 'data-trackid',
|
||||||
|
'title': 'data-tracktitle',
|
||||||
|
'track': 'data-tracktitle',
|
||||||
|
'album': 'data-albumtitle',
|
||||||
|
'duration': ('data-tracktime', {parse_duration}),
|
||||||
|
'release_date': ('data-releasedate', {lambda x: re.match(r'\d{8}', x.replace('.', ''))}, 0),
|
||||||
|
'thumbnail': ('data-albumart', {url_or_none}),
|
||||||
|
})
|
||||||
|
entries.append({
|
||||||
|
**entry,
|
||||||
|
'track_number': track_number,
|
||||||
|
'artist': 'Neko Hacker',
|
||||||
|
'vcodec': 'none',
|
||||||
|
'acodec': 'mp3' if entry['ext'] == 'mp3' else None,
|
||||||
|
})
|
||||||
|
|
||||||
|
return self.playlist_result(entries, playlist_id, traverse_obj(entries, (0, 'album')))
|
|
@ -67,7 +67,7 @@ def get_clean_field(key):
|
||||||
info.update({
|
info.update({
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'ie_key': 'Piksel',
|
'ie_key': 'Piksel',
|
||||||
'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + vod_id,
|
'url': 'https://movie-s.nhk.or.jp/v/refid/nhkworld/prefid/' + vod_id,
|
||||||
'id': vod_id,
|
'id': vod_id,
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
|
@ -94,6 +94,19 @@ class NhkVodIE(NhkBaseIE):
|
||||||
# Content available only for a limited period of time. Visit
|
# Content available only for a limited period of time. Visit
|
||||||
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
|
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2061601/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'yd8322ch',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:109c8b05d67a62d0592f2b445d2cd898',
|
||||||
|
'title': 'GRAND SUMO Highlights - [Recap] May Tournament Day 1 (Opening Day)',
|
||||||
|
'upload_date': '20230514',
|
||||||
|
'timestamp': 1684083791,
|
||||||
|
'series': 'GRAND SUMO Highlights',
|
||||||
|
'episode': '[Recap] May Tournament Day 1 (Opening Day)',
|
||||||
|
'thumbnail': 'https://mz-edge.stream.co.jp/thumbs/aid/t1684084443/4028649.jpg?w=1920&h=1080',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
# video clip
|
# video clip
|
||||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
|
||||||
'md5': '7a90abcfe610ec22a6bfe15bd46b30ca',
|
'md5': '7a90abcfe610ec22a6bfe15bd46b30ca',
|
||||||
|
@ -104,6 +117,9 @@ class NhkVodIE(NhkBaseIE):
|
||||||
'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
|
'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
|
||||||
'timestamp': 1565965194,
|
'timestamp': 1565965194,
|
||||||
'upload_date': '20190816',
|
'upload_date': '20190816',
|
||||||
|
'thumbnail': 'https://mz-edge.stream.co.jp/thumbs/aid/t1567086278/3715195.jpg?w=1920&h=1080',
|
||||||
|
'series': 'Dining with the Chef',
|
||||||
|
'episode': 'Chef Saito\'s Family recipe: MENCHI-KATSU',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# audio clip
|
# audio clip
|
||||||
|
@ -114,10 +130,7 @@ class NhkVodIE(NhkBaseIE):
|
||||||
'title': "Japan's Top Inventions - Miniature Video Cameras",
|
'title': "Japan's Top Inventions - Miniature Video Cameras",
|
||||||
'description': 'md5:07ea722bdbbb4936fdd360b6a480c25b',
|
'description': 'md5:07ea722bdbbb4936fdd360b6a480c25b',
|
||||||
},
|
},
|
||||||
'params': {
|
'skip': '404 Not Found',
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -133,7 +146,6 @@ class NhkVodIE(NhkBaseIE):
|
||||||
}, {
|
}, {
|
||||||
# video, alphabetic character in ID #29670
|
# video, alphabetic character in ID #29670
|
||||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/',
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/',
|
||||||
'only_matching': True,
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'qfjay6cg',
|
'id': 'qfjay6cg',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -142,7 +154,8 @@ class NhkVodIE(NhkBaseIE):
|
||||||
'thumbnail': r're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$',
|
'thumbnail': r're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$',
|
||||||
'upload_date': '20210615',
|
'upload_date': '20210615',
|
||||||
'timestamp': 1623722008,
|
'timestamp': 1623722008,
|
||||||
}
|
},
|
||||||
|
'skip': '404 Not Found',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -153,12 +166,19 @@ class NhkVodProgramIE(NhkBaseIE):
|
||||||
_VALID_URL = r'%s/program%s(?P<id>[0-9a-z]+)(?:.+?\btype=(?P<episode_type>clip|(?:radio|tv)Episode))?' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
|
_VALID_URL = r'%s/program%s(?P<id>[0-9a-z]+)(?:.+?\btype=(?P<episode_type>clip|(?:radio|tv)Episode))?' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# video program episodes
|
# video program episodes
|
||||||
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/sumo',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sumo',
|
||||||
|
'title': 'GRAND SUMO Highlights',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 12,
|
||||||
|
}, {
|
||||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway',
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'japanrailway',
|
'id': 'japanrailway',
|
||||||
'title': 'Japan Railway Journal',
|
'title': 'Japan Railway Journal',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 1,
|
'playlist_mincount': 12,
|
||||||
}, {
|
}, {
|
||||||
# video program clips
|
# video program clips
|
||||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway/?type=clip',
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway/?type=clip',
|
||||||
|
|
|
@ -5,13 +5,17 @@
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from .common import InfoExtractor, SearchInfoExtractor
|
from .common import InfoExtractor, SearchInfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
)
|
)
|
||||||
|
from ..dependencies import websockets
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
|
WebSocketsWrapper,
|
||||||
bug_reports_message,
|
bug_reports_message,
|
||||||
clean_html,
|
clean_html,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
@ -895,3 +899,162 @@ def _entries(self, list_id):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
list_id = self._match_id(url)
|
list_id = self._match_id(url)
|
||||||
return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key())
|
return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key())
|
||||||
|
|
||||||
|
|
||||||
|
class NiconicoLiveIE(InfoExtractor):
|
||||||
|
IE_NAME = 'niconico:live'
|
||||||
|
IE_DESC = 'ニコニコ生放送'
|
||||||
|
_VALID_URL = r'https?://(?:sp\.)?live2?\.nicovideo\.jp/(?:watch|gate)/(?P<id>lv\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'this test case includes invisible characters for title, pasting them as-is',
|
||||||
|
'url': 'https://live.nicovideo.jp/watch/lv339533123',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'lv339533123',
|
||||||
|
'title': '激辛ペヤング食べます( ;ᯅ; )(歌枠オーディション参加中)',
|
||||||
|
'view_count': 1526,
|
||||||
|
'comment_count': 1772,
|
||||||
|
'description': '初めましてもかって言います❕\nのんびり自由に適当に暮らしてます',
|
||||||
|
'uploader': 'もか',
|
||||||
|
'channel': 'ゲストさんのコミュニティ',
|
||||||
|
'channel_id': 'co5776900',
|
||||||
|
'channel_url': 'https://com.nicovideo.jp/community/co5776900',
|
||||||
|
'timestamp': 1670677328,
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
'skip': 'livestream',
|
||||||
|
}, {
|
||||||
|
'url': 'https://live2.nicovideo.jp/watch/lv339533123',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://sp.live.nicovideo.jp/watch/lv339533123',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://sp.live2.nicovideo.jp/watch/lv339533123',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
_KNOWN_LATENCY = ('high', 'low')
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
if not websockets:
|
||||||
|
raise ExtractorError('websockets library is not available. Please install it.', expected=True)
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage, urlh = self._download_webpage_handle(f'https://live.nicovideo.jp/watch/{video_id}', video_id)
|
||||||
|
|
||||||
|
embedded_data = self._parse_json(unescapeHTML(self._search_regex(
|
||||||
|
r'<script\s+id="embedded-data"\s*data-props="(.+?)"', webpage, 'embedded data')), video_id)
|
||||||
|
|
||||||
|
ws_url = traverse_obj(embedded_data, ('site', 'relive', 'webSocketUrl'))
|
||||||
|
if not ws_url:
|
||||||
|
raise ExtractorError('The live hasn\'t started yet or already ended.', expected=True)
|
||||||
|
ws_url = update_url_query(ws_url, {
|
||||||
|
'frontend_id': traverse_obj(embedded_data, ('site', 'frontendId')) or '9',
|
||||||
|
})
|
||||||
|
|
||||||
|
hostname = remove_start(urlparse(urlh.geturl()).hostname, 'sp.')
|
||||||
|
cookies = try_get(urlh.geturl(), self._downloader._calc_cookies)
|
||||||
|
latency = try_get(self._configuration_arg('latency'), lambda x: x[0])
|
||||||
|
if latency not in self._KNOWN_LATENCY:
|
||||||
|
latency = 'high'
|
||||||
|
|
||||||
|
ws = WebSocketsWrapper(ws_url, {
|
||||||
|
'Cookies': str_or_none(cookies) or '',
|
||||||
|
'Origin': f'https://{hostname}',
|
||||||
|
'Accept': '*/*',
|
||||||
|
'User-Agent': self.get_param('http_headers')['User-Agent'],
|
||||||
|
})
|
||||||
|
|
||||||
|
self.write_debug('[debug] Sending HLS server request')
|
||||||
|
ws.send(json.dumps({
|
||||||
|
'type': 'startWatching',
|
||||||
|
'data': {
|
||||||
|
'stream': {
|
||||||
|
'quality': 'abr',
|
||||||
|
'protocol': 'hls+fmp4',
|
||||||
|
'latency': latency,
|
||||||
|
'chasePlay': False
|
||||||
|
},
|
||||||
|
'room': {
|
||||||
|
'protocol': 'webSocket',
|
||||||
|
'commentable': True
|
||||||
|
},
|
||||||
|
'reconnect': False,
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
|
||||||
|
while True:
|
||||||
|
recv = ws.recv()
|
||||||
|
if not recv:
|
||||||
|
continue
|
||||||
|
data = json.loads(recv)
|
||||||
|
if not isinstance(data, dict):
|
||||||
|
continue
|
||||||
|
if data.get('type') == 'stream':
|
||||||
|
m3u8_url = data['data']['uri']
|
||||||
|
qualities = data['data']['availableQualities']
|
||||||
|
break
|
||||||
|
elif data.get('type') == 'disconnect':
|
||||||
|
self.write_debug(recv)
|
||||||
|
raise ExtractorError('Disconnected at middle of extraction')
|
||||||
|
elif data.get('type') == 'error':
|
||||||
|
self.write_debug(recv)
|
||||||
|
message = traverse_obj(data, ('body', 'code')) or recv
|
||||||
|
raise ExtractorError(message)
|
||||||
|
elif self.get_param('verbose', False):
|
||||||
|
if len(recv) > 100:
|
||||||
|
recv = recv[:100] + '...'
|
||||||
|
self.write_debug('Server said: %s' % recv)
|
||||||
|
|
||||||
|
title = traverse_obj(embedded_data, ('program', 'title')) or self._html_search_meta(
|
||||||
|
('og:title', 'twitter:title'), webpage, 'live title', fatal=False)
|
||||||
|
|
||||||
|
raw_thumbs = traverse_obj(embedded_data, ('program', 'thumbnail')) or {}
|
||||||
|
thumbnails = []
|
||||||
|
for name, value in raw_thumbs.items():
|
||||||
|
if not isinstance(value, dict):
|
||||||
|
thumbnails.append({
|
||||||
|
'id': name,
|
||||||
|
'url': value,
|
||||||
|
**parse_resolution(value, lenient=True),
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
for k, img_url in value.items():
|
||||||
|
res = parse_resolution(k, lenient=True) or parse_resolution(img_url, lenient=True)
|
||||||
|
width, height = res.get('width'), res.get('height')
|
||||||
|
|
||||||
|
thumbnails.append({
|
||||||
|
'id': f'{name}_{width}x{height}',
|
||||||
|
'url': img_url,
|
||||||
|
**res,
|
||||||
|
})
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True)
|
||||||
|
for fmt, q in zip(formats, reversed(qualities[1:])):
|
||||||
|
fmt.update({
|
||||||
|
'format_id': q,
|
||||||
|
'protocol': 'niconico_live',
|
||||||
|
'ws': ws,
|
||||||
|
'video_id': video_id,
|
||||||
|
'cookies': cookies,
|
||||||
|
'live_latency': latency,
|
||||||
|
'origin': hostname,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
**traverse_obj(embedded_data, {
|
||||||
|
'view_count': ('program', 'statistics', 'watchCount'),
|
||||||
|
'comment_count': ('program', 'statistics', 'commentCount'),
|
||||||
|
'uploader': ('program', 'supplier', 'name'),
|
||||||
|
'channel': ('socialGroup', 'name'),
|
||||||
|
'channel_id': ('socialGroup', 'id'),
|
||||||
|
'channel_url': ('socialGroup', 'socialGroupPageUrl'),
|
||||||
|
}),
|
||||||
|
'description': clean_html(traverse_obj(embedded_data, ('program', 'description'))),
|
||||||
|
'timestamp': int_or_none(traverse_obj(embedded_data, ('program', 'openTime'))),
|
||||||
|
'is_live': True,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
80
yt_dlp/extractor/owncloud.py
Normal file
80
yt_dlp/extractor/owncloud.py
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
import re
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
determine_ext,
|
||||||
|
url_or_none,
|
||||||
|
urlencode_postdata,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class OwnCloudIE(InfoExtractor):
|
||||||
|
_INSTANCES_RE = '|'.join((
|
||||||
|
r'(?:[^\.]+\.)?sciebo\.de',
|
||||||
|
r'cloud\.uni-koblenz-landau\.de',
|
||||||
|
))
|
||||||
|
_VALID_URL = rf'https?://(?:{_INSTANCES_RE})/s/(?P<id>[\w.-]+)'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'https://ruhr-uni-bochum.sciebo.de/s/wWhqZzh9jTumVFN',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'wWhqZzh9jTumVFN',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'CmvpJST.mp4',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://ruhr-uni-bochum.sciebo.de/s/WNDuFu0XuFtmm3f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'WNDuFu0XuFtmm3f',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'CmvpJST.mp4',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'videopassword': '12345',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||||
|
|
||||||
|
if re.search(r'<label[^>]+for="password"', webpage):
|
||||||
|
webpage = self._verify_video_password(webpage, urlh.geturl(), video_id)
|
||||||
|
|
||||||
|
hidden_inputs = self._hidden_inputs(webpage)
|
||||||
|
title = hidden_inputs.get('filename')
|
||||||
|
parsed_url = urllib.parse.urlparse(url)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'url': url_or_none(hidden_inputs.get('downloadURL')) or parsed_url._replace(
|
||||||
|
path=urllib.parse.urljoin(parsed_url.path, 'download')).geturl(),
|
||||||
|
'ext': determine_ext(title),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _verify_video_password(self, webpage, url, video_id):
|
||||||
|
password = self.get_param('videopassword')
|
||||||
|
if password is None:
|
||||||
|
raise ExtractorError(
|
||||||
|
'This video is protected by a password, use the --video-password option',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
validation_response = self._download_webpage(
|
||||||
|
url, video_id, 'Validating Password', 'Wrong password?',
|
||||||
|
data=urlencode_postdata({
|
||||||
|
'requesttoken': self._hidden_inputs(webpage)['requesttoken'],
|
||||||
|
'password': password,
|
||||||
|
}))
|
||||||
|
|
||||||
|
if re.search(r'<label[^>]+for="password"', validation_response):
|
||||||
|
warning = self._search_regex(
|
||||||
|
r'<div[^>]+class="warning">([^<]*)</div>', validation_response,
|
||||||
|
'warning', default='The password is wrong')
|
||||||
|
raise ExtractorError(f'Opening the video failed, {self.IE_NAME} said: {warning!r}', expected=True)
|
||||||
|
return validation_response
|
|
@ -7,8 +7,10 @@
|
||||||
int_or_none,
|
int_or_none,
|
||||||
join_nonempty,
|
join_nonempty,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
traverse_obj,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -63,11 +65,11 @@ class PikselIE(InfoExtractor):
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _call_api(self, app_token, resource, display_id, query, fatal=True):
|
def _call_api(self, app_token, resource, display_id, query, host='https://player.piksel.com', fatal=True):
|
||||||
response = (self._download_json(
|
url = urljoin(host, f'/ws/ws_{resource}/api/{app_token}/mode/json/apiv/5')
|
||||||
'http://player.piksel.com/ws/ws_%s/api/%s/mode/json/apiv/5' % (resource, app_token),
|
response = traverse_obj(
|
||||||
display_id, query=query, fatal=fatal) or {}).get('response')
|
self._download_json(url, display_id, query=query, fatal=fatal), ('response', {dict})) or {}
|
||||||
failure = try_get(response, lambda x: x['failure']['reason'])
|
failure = traverse_obj(response, ('failure', 'reason')) if response else 'Empty response from API'
|
||||||
if failure:
|
if failure:
|
||||||
if fatal:
|
if fatal:
|
||||||
raise ExtractorError(failure, expected=True)
|
raise ExtractorError(failure, expected=True)
|
||||||
|
@ -83,7 +85,7 @@ def _real_extract(self, url):
|
||||||
], webpage, 'app token')
|
], webpage, 'app token')
|
||||||
query = {'refid': ref_id, 'prefid': display_id} if ref_id else {'v': display_id}
|
query = {'refid': ref_id, 'prefid': display_id} if ref_id else {'v': display_id}
|
||||||
program = self._call_api(
|
program = self._call_api(
|
||||||
app_token, 'program', display_id, query)['WsProgramResponse']['program']
|
app_token, 'program', display_id, query, url)['WsProgramResponse']['program']
|
||||||
video_id = program['uuid']
|
video_id = program['uuid']
|
||||||
video_data = program['asset']
|
video_data = program['asset']
|
||||||
title = video_data['title']
|
title = video_data['title']
|
||||||
|
@ -129,7 +131,7 @@ def process_asset_files(asset_files):
|
||||||
process_asset_files(try_get(self._call_api(
|
process_asset_files(try_get(self._call_api(
|
||||||
app_token, 'asset_file', display_id, {
|
app_token, 'asset_file', display_id, {
|
||||||
'assetid': asset_id,
|
'assetid': asset_id,
|
||||||
}, False), lambda x: x['WsAssetFileResponse']['AssetFiles']))
|
}, url, False), lambda x: x['WsAssetFileResponse']['AssetFiles']))
|
||||||
|
|
||||||
m3u8_url = dict_get(video_data, [
|
m3u8_url = dict_get(video_data, [
|
||||||
'm3u8iPadURL',
|
'm3u8iPadURL',
|
||||||
|
|
|
@ -5,10 +5,16 @@
|
||||||
|
|
||||||
|
|
||||||
class PlaySuisseIE(InfoExtractor):
|
class PlaySuisseIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?playsuisse\.ch/watch/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?playsuisse\.ch/(?:watch|detail)/(?:[^#]*[?&]episodeId=)?(?P<id>[0-9]+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
|
# Old URL
|
||||||
'url': 'https://www.playsuisse.ch/watch/763211/0',
|
'url': 'https://www.playsuisse.ch/watch/763211/0',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# episode in a series
|
||||||
|
'url': 'https://www.playsuisse.ch/watch/763182?episodeId=763211',
|
||||||
'md5': '82df2a470b2dfa60c2d33772a8a60cf8',
|
'md5': '82df2a470b2dfa60c2d33772a8a60cf8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '763211',
|
'id': '763211',
|
||||||
|
@ -21,11 +27,11 @@ class PlaySuisseIE(InfoExtractor):
|
||||||
'season_number': 1,
|
'season_number': 1,
|
||||||
'episode': 'Knochen',
|
'episode': 'Knochen',
|
||||||
'episode_number': 1,
|
'episode_number': 1,
|
||||||
'thumbnail': 'md5:9260abe0c0ec9b69914d0a10d54c5878'
|
'thumbnail': 're:https://playsuisse-img.akamaized.net/',
|
||||||
}
|
}
|
||||||
},
|
}, {
|
||||||
{
|
# film
|
||||||
'url': 'https://www.playsuisse.ch/watch/808675/0',
|
'url': 'https://www.playsuisse.ch/watch/808675',
|
||||||
'md5': '818b94c1d2d7c4beef953f12cb8f3e75',
|
'md5': '818b94c1d2d7c4beef953f12cb8f3e75',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '808675',
|
'id': '808675',
|
||||||
|
@ -33,26 +39,60 @@ class PlaySuisseIE(InfoExtractor):
|
||||||
'title': 'Der Läufer',
|
'title': 'Der Läufer',
|
||||||
'description': 'md5:9f61265c7e6dcc3e046137a792b275fd',
|
'description': 'md5:9f61265c7e6dcc3e046137a792b275fd',
|
||||||
'duration': 5280,
|
'duration': 5280,
|
||||||
'episode': 'Der Läufer',
|
'thumbnail': 're:https://playsuisse-img.akamaized.net/',
|
||||||
'thumbnail': 'md5:44af7d65ee02bbba4576b131868bb783'
|
|
||||||
}
|
}
|
||||||
},
|
}, {
|
||||||
{
|
# series (treated as a playlist)
|
||||||
'url': 'https://www.playsuisse.ch/watch/817193/0',
|
'url': 'https://www.playsuisse.ch/detail/1115687',
|
||||||
'md5': '1d6c066f92cd7fffd8b28a53526d6b59',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '817193',
|
'description': 'md5:e4a2ae29a8895823045b5c3145a02aa3',
|
||||||
'ext': 'mp4',
|
'id': '1115687',
|
||||||
'title': 'Die Einweihungsparty',
|
'series': 'They all came out to Montreux',
|
||||||
'description': 'md5:91ebf04d3a42cb3ab70666acf750a930',
|
'title': 'They all came out to Montreux',
|
||||||
'duration': 1380,
|
},
|
||||||
'series': 'Nr. 47',
|
'playlist': [{
|
||||||
'season': 'Season 1',
|
'info_dict': {
|
||||||
'season_number': 1,
|
'description': 'md5:f2462744834b959a31adc6292380cda2',
|
||||||
'episode': 'Die Einweihungsparty',
|
'duration': 3180,
|
||||||
'episode_number': 1,
|
'episode': 'Folge 1',
|
||||||
'thumbnail': 'md5:637585fb106e3a4bcd991958924c7e44'
|
'episode_number': 1,
|
||||||
}
|
'id': '1112663',
|
||||||
|
'season': 'Season 1',
|
||||||
|
'season_number': 1,
|
||||||
|
'series': 'They all came out to Montreux',
|
||||||
|
'thumbnail': 're:https://playsuisse-img.akamaized.net/',
|
||||||
|
'title': 'Folge 1',
|
||||||
|
'ext': 'mp4'
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'description': 'md5:9dfd308699fe850d3bce12dc1bad9b27',
|
||||||
|
'duration': 2935,
|
||||||
|
'episode': 'Folge 2',
|
||||||
|
'episode_number': 2,
|
||||||
|
'id': '1112661',
|
||||||
|
'season': 'Season 1',
|
||||||
|
'season_number': 1,
|
||||||
|
'series': 'They all came out to Montreux',
|
||||||
|
'thumbnail': 're:https://playsuisse-img.akamaized.net/',
|
||||||
|
'title': 'Folge 2',
|
||||||
|
'ext': 'mp4'
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'description': 'md5:14a93a3356b2492a8f786ab2227ef602',
|
||||||
|
'duration': 2994,
|
||||||
|
'episode': 'Folge 3',
|
||||||
|
'episode_number': 3,
|
||||||
|
'id': '1112664',
|
||||||
|
'season': 'Season 1',
|
||||||
|
'season_number': 1,
|
||||||
|
'series': 'They all came out to Montreux',
|
||||||
|
'thumbnail': 're:https://playsuisse-img.akamaized.net/',
|
||||||
|
'title': 'Folge 3',
|
||||||
|
'ext': 'mp4'
|
||||||
|
}
|
||||||
|
}],
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -142,6 +182,6 @@ def _extract_single(self, media_data):
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'series': media_data.get('seriesName'),
|
'series': media_data.get('seriesName'),
|
||||||
'season_number': int_or_none(media_data.get('seasonNumber')),
|
'season_number': int_or_none(media_data.get('seasonNumber')),
|
||||||
'episode': media_data.get('name'),
|
'episode': media_data.get('name') if media_data.get('episodeNumber') else None,
|
||||||
'episode_number': int_or_none(media_data.get('episodeNumber')),
|
'episode_number': int_or_none(media_data.get('episodeNumber')),
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,26 +2,24 @@
|
||||||
import json
|
import json
|
||||||
import math
|
import math
|
||||||
import re
|
import re
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_str
|
||||||
compat_str,
|
|
||||||
compat_urllib_parse_unquote,
|
|
||||||
compat_urlparse
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
|
||||||
extract_attributes,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
InAdvancePagedList,
|
InAdvancePagedList,
|
||||||
|
determine_ext,
|
||||||
|
extract_attributes,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
unified_timestamp,
|
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
unified_timestamp,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -44,7 +42,7 @@ def _extract_webpage_player_entries(self, webpage, playlist_id, base_data):
|
||||||
'duration': int_or_none(media.get('length')),
|
'duration': int_or_none(media.get('length')),
|
||||||
'vcodec': 'none' if media.get('provider') == 'audio' else None,
|
'vcodec': 'none' if media.get('provider') == 'audio' else None,
|
||||||
})
|
})
|
||||||
entry_title = compat_urllib_parse_unquote(media['desc'])
|
entry_title = urllib.parse.unquote(media['desc'])
|
||||||
if entry_title:
|
if entry_title:
|
||||||
entry['title'] = entry_title
|
entry['title'] = entry_title
|
||||||
yield entry
|
yield entry
|
||||||
|
@ -130,10 +128,11 @@ def _real_extract(self, url):
|
||||||
return self.playlist_result(entries, playlist_id, title, description)
|
return self.playlist_result(entries, playlist_id, title, description)
|
||||||
|
|
||||||
|
|
||||||
class PolskieRadioIE(InfoExtractor):
|
class PolskieRadioIE(PolskieRadioBaseExtractor):
|
||||||
# new next.js sites, excluding radiokierowcow.pl
|
# new next.js sites
|
||||||
_VALID_URL = r'https?://(?:[^/]+\.)?polskieradio(?:24)?\.pl/artykul/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:[^/]+\.)?(?:polskieradio(?:24)?|radiokierowcow)\.pl/artykul/(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
# articleData, attachments
|
||||||
'url': 'https://jedynka.polskieradio.pl/artykul/1587943',
|
'url': 'https://jedynka.polskieradio.pl/artykul/1587943',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1587943',
|
'id': '1587943',
|
||||||
|
@ -148,6 +147,31 @@ class PolskieRadioIE(InfoExtractor):
|
||||||
'title': 'md5:d4623290d4ac983bf924061c75c23a0d',
|
'title': 'md5:d4623290d4ac983bf924061c75c23a0d',
|
||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
|
}, {
|
||||||
|
# post, legacy html players
|
||||||
|
'url': 'https://trojka.polskieradio.pl/artykul/2589163,Czy-wciaz-otrzymujemy-zdjecia-z-sond-Voyager',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2589163',
|
||||||
|
'title': 'Czy wciąż otrzymujemy zdjęcia z sond Voyager?',
|
||||||
|
'description': 'md5:cf1a7f348d63a2db9c0d7a63d1669473',
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2577880',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'md5:a57d10a0c02abd34dd675cb33707ad5a',
|
||||||
|
'duration': 321,
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}, {
|
||||||
|
# data, legacy
|
||||||
|
'url': 'https://radiokierowcow.pl/artykul/2694529',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2694529',
|
||||||
|
'title': 'Zielona fala reliktem przeszłości?',
|
||||||
|
'description': 'md5:f20a9a7ed9cb58916c54add94eae3bc0',
|
||||||
|
},
|
||||||
|
'playlist_count': 3,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://trojka.polskieradio.pl/artykul/1632955',
|
'url': 'https://trojka.polskieradio.pl/artykul/1632955',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -166,7 +190,8 @@ def _real_extract(self, url):
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
article_data = traverse_obj(
|
article_data = traverse_obj(
|
||||||
self._search_nextjs_data(webpage, playlist_id), ('props', 'pageProps', 'data', 'articleData'))
|
self._search_nextjs_data(webpage, playlist_id), (
|
||||||
|
'props', 'pageProps', (('data', 'articleData'), 'post', 'data')), get_all=False)
|
||||||
|
|
||||||
title = strip_or_none(article_data['title'])
|
title = strip_or_none(article_data['title'])
|
||||||
|
|
||||||
|
@ -178,7 +203,13 @@ def _real_extract(self, url):
|
||||||
'id': self._search_regex(
|
'id': self._search_regex(
|
||||||
r'([a-f\d]{8}-(?:[a-f\d]{4}-){3}[a-f\d]{12})', entry['file'], 'entry id'),
|
r'([a-f\d]{8}-(?:[a-f\d]{4}-){3}[a-f\d]{12})', entry['file'], 'entry id'),
|
||||||
'title': strip_or_none(entry.get('description')) or title,
|
'title': strip_or_none(entry.get('description')) or title,
|
||||||
} for entry in article_data.get('attachments') or () if entry['fileType'] in ('Audio', )]
|
} for entry in article_data.get('attachments') or () if entry.get('fileType') in ('Audio', )]
|
||||||
|
|
||||||
|
if not entries:
|
||||||
|
# some legacy articles have no json attachments, but players in body
|
||||||
|
entries = self._extract_webpage_player_entries(article_data['content'], playlist_id, {
|
||||||
|
'title': title,
|
||||||
|
})
|
||||||
|
|
||||||
return self.playlist_result(entries, playlist_id, title, description)
|
return self.playlist_result(entries, playlist_id, title, description)
|
||||||
|
|
||||||
|
@ -214,6 +245,15 @@ class PolskieRadioAuditionIE(InfoExtractor):
|
||||||
'thumbnail': r're:https://static\.prsa\.pl/images/.+',
|
'thumbnail': r're:https://static\.prsa\.pl/images/.+',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 722,
|
'playlist_mincount': 722,
|
||||||
|
}, {
|
||||||
|
# some articles were "promoted to main page" and thus link to old frontend
|
||||||
|
'url': 'https://trojka.polskieradio.pl/audycja/305',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '305',
|
||||||
|
'title': 'Co w mowie piszczy?',
|
||||||
|
'thumbnail': r're:https://static\.prsa\.pl/images/.+',
|
||||||
|
},
|
||||||
|
'playlist_count': 1523,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _call_lp3(self, path, query, video_id, note):
|
def _call_lp3(self, path, query, video_id, note):
|
||||||
|
@ -254,7 +294,6 @@ def _entries(self, playlist_id, has_episodes, has_articles):
|
||||||
for article in page['data']:
|
for article in page['data']:
|
||||||
yield {
|
yield {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'ie_key': PolskieRadioIE.ie_key(),
|
|
||||||
'id': str(article['id']),
|
'id': str(article['id']),
|
||||||
'url': article['url'],
|
'url': article['url'],
|
||||||
'title': article.get('shortTitle'),
|
'title': article.get('shortTitle'),
|
||||||
|
@ -282,11 +321,8 @@ def _real_extract(self, url):
|
||||||
class PolskieRadioCategoryIE(InfoExtractor):
|
class PolskieRadioCategoryIE(InfoExtractor):
|
||||||
# legacy sites
|
# legacy sites
|
||||||
IE_NAME = 'polskieradio:category'
|
IE_NAME = 'polskieradio:category'
|
||||||
_VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+(?:,[^/]+)?/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/(?:\d+(?:,[^/]+)?/|[^/]+/Tag)(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.polskieradio.pl/7/129,Sygnaly-dnia?ref=source',
|
|
||||||
'only_matching': True
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.polskieradio.pl/37,RedakcjaKatolicka/4143,Kierunek-Krakow',
|
'url': 'http://www.polskieradio.pl/37,RedakcjaKatolicka/4143,Kierunek-Krakow',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4143',
|
'id': '4143',
|
||||||
|
@ -300,6 +336,36 @@ class PolskieRadioCategoryIE(InfoExtractor):
|
||||||
'title': 'Muzyka',
|
'title': 'Muzyka',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 61
|
'playlist_mincount': 61
|
||||||
|
}, {
|
||||||
|
# billennium tabs
|
||||||
|
'url': 'https://www.polskieradio.pl/8/2385',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2385',
|
||||||
|
'title': 'Droga przez mąkę',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 111,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.polskieradio.pl/10/4930',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4930',
|
||||||
|
'title': 'Teraz K-pop!',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 392,
|
||||||
|
}, {
|
||||||
|
# post back pages, audio content directly without articles
|
||||||
|
'url': 'https://www.polskieradio.pl/8,dwojka/7376,nowa-mowa',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7376',
|
||||||
|
'title': 'Nowa mowa',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 244,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.polskieradio.pl/Krzysztof-Dziuba/Tag175458',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '175458',
|
||||||
|
'title': 'Krzysztof Dziuba',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 420,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.polskieradio.pl/8,Dwojka/196,Publicystyka',
|
'url': 'http://www.polskieradio.pl/8,Dwojka/196,Publicystyka',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -311,25 +377,61 @@ def suitable(cls, url):
|
||||||
|
|
||||||
def _entries(self, url, page, category_id):
|
def _entries(self, url, page, category_id):
|
||||||
content = page
|
content = page
|
||||||
|
is_billennium_tabs = 'onclick="TB_LoadTab(' in page
|
||||||
|
is_post_back = 'onclick="__doPostBack(' in page
|
||||||
|
pagination = page if is_billennium_tabs else None
|
||||||
for page_num in itertools.count(2):
|
for page_num in itertools.count(2):
|
||||||
for a_entry, entry_id in re.findall(
|
for a_entry, entry_id in re.findall(
|
||||||
r'(?s)<article[^>]+>.*?(<a[^>]+href=["\']/\d+/\d+/Artykul/(\d+)[^>]+>).*?</article>',
|
r'(?s)<article[^>]+>.*?(<a[^>]+href=["\'](?:(?:https?)?://[^/]+)?/\d+/\d+/Artykul/(\d+)[^>]+>).*?</article>',
|
||||||
content):
|
content):
|
||||||
entry = extract_attributes(a_entry)
|
entry = extract_attributes(a_entry)
|
||||||
href = entry.get('href')
|
if entry.get('href'):
|
||||||
if not href:
|
yield self.url_result(
|
||||||
continue
|
urljoin(url, entry['href']), PolskieRadioLegacyIE, entry_id, entry.get('title'))
|
||||||
yield self.url_result(
|
for a_entry in re.findall(r'<span data-media=({[^ ]+})', content):
|
||||||
compat_urlparse.urljoin(url, href), PolskieRadioLegacyIE,
|
yield traverse_obj(self._parse_json(a_entry, category_id), {
|
||||||
entry_id, entry.get('title'))
|
'url': 'file',
|
||||||
mobj = re.search(
|
'id': 'uid',
|
||||||
r'<div[^>]+class=["\']next["\'][^>]*>\s*<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
'duration': 'length',
|
||||||
content)
|
'title': ('title', {urllib.parse.unquote}),
|
||||||
if not mobj:
|
'description': ('desc', {urllib.parse.unquote}),
|
||||||
break
|
})
|
||||||
next_url = compat_urlparse.urljoin(url, mobj.group('url'))
|
if is_billennium_tabs:
|
||||||
content = self._download_webpage(
|
params = self._search_json(
|
||||||
next_url, category_id, 'Downloading page %s' % page_num)
|
r'<div[^>]+class=["\']next["\'][^>]*>\s*<a[^>]+onclick=["\']TB_LoadTab\(',
|
||||||
|
pagination, 'next page params', category_id, default=None, close_objects=1,
|
||||||
|
contains_pattern='.+', transform_source=lambda x: '[%s' % js_to_json(unescapeHTML(x)))
|
||||||
|
if not params:
|
||||||
|
break
|
||||||
|
tab_content = self._download_json(
|
||||||
|
'https://www.polskieradio.pl/CMS/TemplateBoxesManagement/TemplateBoxTabContent.aspx/GetTabContent',
|
||||||
|
category_id, f'Downloading page {page_num}', headers={'content-type': 'application/json'},
|
||||||
|
data=json.dumps(dict(zip((
|
||||||
|
'boxInstanceId', 'tabId', 'categoryType', 'sectionId', 'categoryId', 'pagerMode',
|
||||||
|
'subjectIds', 'tagIndexId', 'queryString', 'name', 'openArticlesInParentTemplate',
|
||||||
|
'idSectionFromUrl', 'maxDocumentAge', 'showCategoryForArticle', 'pageNumber'
|
||||||
|
), params))).encode())['d']
|
||||||
|
content, pagination = tab_content['Content'], tab_content.get('PagerContent')
|
||||||
|
elif is_post_back:
|
||||||
|
target = self._search_regex(
|
||||||
|
r'onclick=(?:["\'])__doPostBack\((?P<q1>["\'])(?P<target>[\w$]+)(?P=q1)\s*,\s*(?P<q2>["\'])Next(?P=q2)',
|
||||||
|
content, 'pagination postback target', group='target', default=None)
|
||||||
|
if not target:
|
||||||
|
break
|
||||||
|
content = self._download_webpage(
|
||||||
|
url, category_id, f'Downloading page {page_num}',
|
||||||
|
data=urllib.parse.urlencode({
|
||||||
|
**self._hidden_inputs(content),
|
||||||
|
'__EVENTTARGET': target,
|
||||||
|
'__EVENTARGUMENT': 'Next',
|
||||||
|
}).encode())
|
||||||
|
else:
|
||||||
|
next_url = urljoin(url, self._search_regex(
|
||||||
|
r'<div[^>]+class=["\']next["\'][^>]*>\s*<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||||
|
content, 'next page url', group='url', default=None))
|
||||||
|
if not next_url:
|
||||||
|
break
|
||||||
|
content = self._download_webpage(next_url, category_id, f'Downloading page {page_num}')
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
category_id = self._match_id(url)
|
category_id = self._match_id(url)
|
||||||
|
@ -337,7 +439,7 @@ def _real_extract(self, url):
|
||||||
if PolskieRadioAuditionIE.suitable(urlh.url):
|
if PolskieRadioAuditionIE.suitable(urlh.url):
|
||||||
return self.url_result(urlh.url, PolskieRadioAuditionIE, category_id)
|
return self.url_result(urlh.url, PolskieRadioAuditionIE, category_id)
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<title>([^<]+) - [^<]+ - [^<]+</title>',
|
r'<title>([^<]+)(?: - [^<]+ - [^<]+| w [Pp]olskie[Rr]adio\.pl\s*)</title>',
|
||||||
webpage, 'title', fatal=False)
|
webpage, 'title', fatal=False)
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
self._entries(url, webpage, category_id),
|
self._entries(url, webpage, category_id),
|
||||||
|
@ -506,39 +608,3 @@ def _real_extract(self, url):
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
})
|
})
|
||||||
return self._parse_episode(data[0])
|
return self._parse_episode(data[0])
|
||||||
|
|
||||||
|
|
||||||
class PolskieRadioRadioKierowcowIE(PolskieRadioBaseExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?radiokierowcow\.pl/artykul/(?P<id>[0-9]+)'
|
|
||||||
IE_NAME = 'polskieradio:kierowcow'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://radiokierowcow.pl/artykul/2694529',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2694529',
|
|
||||||
'title': 'Zielona fala reliktem przeszłości?',
|
|
||||||
'description': 'md5:343950a8717c9818fdfd4bd2b8ca9ff2',
|
|
||||||
},
|
|
||||||
'playlist_count': 3,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
media_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, media_id)
|
|
||||||
nextjs_build = self._search_nextjs_data(webpage, media_id)['buildId']
|
|
||||||
article = self._download_json(
|
|
||||||
f'https://radiokierowcow.pl/_next/data/{nextjs_build}/artykul/{media_id}.json?articleId={media_id}',
|
|
||||||
media_id)
|
|
||||||
data = article['pageProps']['data']
|
|
||||||
title = data['title']
|
|
||||||
entries = self._extract_webpage_player_entries(data['content'], media_id, {
|
|
||||||
'title': title,
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'id': media_id,
|
|
||||||
'entries': entries,
|
|
||||||
'title': title,
|
|
||||||
'description': data.get('lead'),
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,19 +1,12 @@
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
|
||||||
compat_str,
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
filter_dict,
|
filter_dict,
|
||||||
find_xpath_attr,
|
|
||||||
fix_xml_ampersands,
|
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
HEADRequest,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
join_nonempty,
|
join_nonempty,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
@ -35,82 +28,70 @@ class RaiBaseIE(InfoExtractor):
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
|
|
||||||
def _extract_relinker_info(self, relinker_url, video_id, audio_only=False):
|
def _extract_relinker_info(self, relinker_url, video_id, audio_only=False):
|
||||||
|
def fix_cdata(s):
|
||||||
|
# remove \r\n\t before and after <![CDATA[ ]]> to avoid
|
||||||
|
# polluted text with xpath_text
|
||||||
|
s = re.sub(r'(\]\]>)[\r\n\t]+(</)', '\\1\\2', s)
|
||||||
|
return re.sub(r'(>)[\r\n\t]+(<!\[CDATA\[)', '\\1\\2', s)
|
||||||
|
|
||||||
if not re.match(r'https?://', relinker_url):
|
if not re.match(r'https?://', relinker_url):
|
||||||
return {'formats': [{'url': relinker_url}]}
|
return {'formats': [{'url': relinker_url}]}
|
||||||
|
|
||||||
|
# set User-Agent to generic 'Rai' to avoid quality filtering from
|
||||||
|
# the media server and get the maximum qualities available
|
||||||
|
relinker = self._download_xml(
|
||||||
|
relinker_url, video_id, note='Downloading XML metadata',
|
||||||
|
transform_source=fix_cdata, query={'output': 64},
|
||||||
|
headers={**self.geo_verification_headers(), 'User-Agent': 'Rai'})
|
||||||
|
|
||||||
|
if xpath_text(relinker, './license_url', default='{}') != '{}':
|
||||||
|
self.report_drm(video_id)
|
||||||
|
|
||||||
|
is_live = xpath_text(relinker, './is_live', default='N') == 'Y'
|
||||||
|
duration = parse_duration(xpath_text(relinker, './duration', default=None))
|
||||||
|
media_url = xpath_text(relinker, './url[@type="content"]', default=None)
|
||||||
|
|
||||||
|
if not media_url:
|
||||||
|
self.raise_no_formats('The relinker returned no media url')
|
||||||
|
|
||||||
|
# geo flag is a bit unreliable and not properly set all the time
|
||||||
|
geoprotection = xpath_text(relinker, './geoprotection', default='N') == 'Y'
|
||||||
|
|
||||||
|
ext = determine_ext(media_url)
|
||||||
formats = []
|
formats = []
|
||||||
geoprotection = None
|
|
||||||
is_live = None
|
|
||||||
duration = None
|
|
||||||
|
|
||||||
for platform in ('mon', 'flash', 'native'):
|
if ext == 'mp3':
|
||||||
relinker = self._download_xml(
|
formats.append({
|
||||||
relinker_url, video_id,
|
'url': media_url,
|
||||||
note=f'Downloading XML metadata for platform {platform}',
|
'vcodec': 'none',
|
||||||
transform_source=fix_xml_ampersands,
|
'acodec': 'mp3',
|
||||||
query={'output': 45, 'pl': platform},
|
'format_id': 'https-mp3',
|
||||||
headers=self.geo_verification_headers())
|
})
|
||||||
|
elif ext == 'm3u8' or 'format=m3u8' in media_url:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||||
|
elif ext == 'f4m':
|
||||||
|
# very likely no longer needed. Cannot find any url that uses it.
|
||||||
|
manifest_url = update_url_query(
|
||||||
|
media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'),
|
||||||
|
{'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'})
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
manifest_url, video_id, f4m_id='hds', fatal=False))
|
||||||
|
elif ext == 'mp4':
|
||||||
|
bitrate = int_or_none(xpath_text(relinker, './bitrate'))
|
||||||
|
formats.append({
|
||||||
|
'url': media_url,
|
||||||
|
'tbr': bitrate if bitrate > 0 else None,
|
||||||
|
'format_id': join_nonempty('https', bitrate, delim='-'),
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Unrecognized media file found')
|
||||||
|
|
||||||
if xpath_text(relinker, './license_url', default='{}') != '{}':
|
if (not formats and geoprotection is True) or '/video_no_available.mp4' in media_url:
|
||||||
self.report_drm(video_id)
|
|
||||||
|
|
||||||
if not geoprotection:
|
|
||||||
geoprotection = xpath_text(
|
|
||||||
relinker, './geoprotection', default=None) == 'Y'
|
|
||||||
|
|
||||||
if not is_live:
|
|
||||||
is_live = xpath_text(
|
|
||||||
relinker, './is_live', default=None) == 'Y'
|
|
||||||
if not duration:
|
|
||||||
duration = parse_duration(xpath_text(
|
|
||||||
relinker, './duration', default=None))
|
|
||||||
|
|
||||||
url_elem = find_xpath_attr(relinker, './url', 'type', 'content')
|
|
||||||
if url_elem is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
media_url = url_elem.text
|
|
||||||
|
|
||||||
# This does not imply geo restriction (e.g.
|
|
||||||
# http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html)
|
|
||||||
if '/video_no_available.mp4' in media_url:
|
|
||||||
continue
|
|
||||||
|
|
||||||
ext = determine_ext(media_url)
|
|
||||||
if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'):
|
|
||||||
continue
|
|
||||||
|
|
||||||
if ext == 'mp3':
|
|
||||||
formats.append({
|
|
||||||
'url': media_url,
|
|
||||||
'vcodec': 'none',
|
|
||||||
'acodec': 'mp3',
|
|
||||||
'format_id': 'http-mp3',
|
|
||||||
})
|
|
||||||
break
|
|
||||||
elif ext == 'm3u8' or 'format=m3u8' in media_url or platform == 'mon':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
media_url, video_id, 'mp4', 'm3u8_native',
|
|
||||||
m3u8_id='hls', fatal=False))
|
|
||||||
elif ext == 'f4m' or platform == 'flash':
|
|
||||||
manifest_url = update_url_query(
|
|
||||||
media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'),
|
|
||||||
{'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'})
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
manifest_url, video_id, f4m_id='hds', fatal=False))
|
|
||||||
else:
|
|
||||||
bitrate = int_or_none(xpath_text(relinker, 'bitrate'))
|
|
||||||
formats.append({
|
|
||||||
'url': media_url,
|
|
||||||
'tbr': bitrate if bitrate > 0 else None,
|
|
||||||
'format_id': f'http-{bitrate if bitrate > 0 else "http"}',
|
|
||||||
})
|
|
||||||
|
|
||||||
if not formats and geoprotection is True:
|
|
||||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
|
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
|
||||||
|
|
||||||
if not audio_only:
|
if not audio_only and not is_live:
|
||||||
formats.extend(self._create_http_urls(relinker_url, formats))
|
formats.extend(self._create_http_urls(media_url, relinker_url, formats))
|
||||||
|
|
||||||
return filter_dict({
|
return filter_dict({
|
||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
|
@ -118,38 +99,31 @@ def _extract_relinker_info(self, relinker_url, video_id, audio_only=False):
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
})
|
})
|
||||||
|
|
||||||
def _create_http_urls(self, relinker_url, fmts):
|
def _create_http_urls(self, manifest_url, relinker_url, fmts):
|
||||||
_RELINKER_REG = r'https?://(?P<host>[^/]+?)/(?:i/)?(?P<extra>[^/]+?)/(?P<path>.+?)/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4|/playlist\.m3u8).+?'
|
_MANIFEST_REG = r'/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4)?(?:\.csmil)?/playlist\.m3u8'
|
||||||
_MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s'
|
_MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s'
|
||||||
_QUALITY = {
|
_QUALITY = {
|
||||||
# tbr: w, h
|
# tbr: w, h
|
||||||
'250': [352, 198],
|
250: [352, 198],
|
||||||
'400': [512, 288],
|
400: [512, 288],
|
||||||
'700': [512, 288],
|
600: [512, 288],
|
||||||
'800': [700, 394],
|
700: [512, 288],
|
||||||
'1200': [736, 414],
|
800: [700, 394],
|
||||||
'1800': [1024, 576],
|
1200: [736, 414],
|
||||||
'2400': [1280, 720],
|
1500: [920, 518],
|
||||||
'3200': [1440, 810],
|
1800: [1024, 576],
|
||||||
'3600': [1440, 810],
|
2400: [1280, 720],
|
||||||
'5000': [1920, 1080],
|
3200: [1440, 810],
|
||||||
'10000': [1920, 1080],
|
3600: [1440, 810],
|
||||||
|
5000: [1920, 1080],
|
||||||
|
10000: [1920, 1080],
|
||||||
}
|
}
|
||||||
|
|
||||||
def test_url(url):
|
def percentage(number, target, pc=20, roof=125):
|
||||||
resp = self._request_webpage(
|
'''check if the target is in the range of number +/- percent'''
|
||||||
HEADRequest(url), None, headers={'User-Agent': 'Rai'},
|
if not number or number < 0:
|
||||||
fatal=False, errnote=False, note=False)
|
|
||||||
|
|
||||||
if resp is False:
|
|
||||||
return False
|
return False
|
||||||
|
return abs(target - number) < min(float(number) * float(pc) / 100.0, roof)
|
||||||
if resp.code == 200:
|
|
||||||
return False if resp.url == url else resp.url
|
|
||||||
return None
|
|
||||||
|
|
||||||
# filter out audio-only formats
|
|
||||||
fmts = [f for f in fmts if not f.get('vcodec') == 'none']
|
|
||||||
|
|
||||||
def get_format_info(tbr):
|
def get_format_info(tbr):
|
||||||
import math
|
import math
|
||||||
|
@ -157,67 +131,78 @@ def get_format_info(tbr):
|
||||||
if len(fmts) == 1 and not br:
|
if len(fmts) == 1 and not br:
|
||||||
br = fmts[0].get('tbr')
|
br = fmts[0].get('tbr')
|
||||||
if br and br > 300:
|
if br and br > 300:
|
||||||
tbr = compat_str(math.floor(br / 100) * 100)
|
tbr = math.floor(br / 100) * 100
|
||||||
else:
|
else:
|
||||||
tbr = '250'
|
tbr = 250
|
||||||
|
|
||||||
# try extracting info from available m3u8 formats
|
# try extracting info from available m3u8 formats
|
||||||
format_copy = None
|
format_copy = [None, None]
|
||||||
for f in fmts:
|
for f in fmts:
|
||||||
if f.get('tbr'):
|
if f.get('tbr'):
|
||||||
br_limit = math.floor(br / 100)
|
if percentage(tbr, f['tbr']):
|
||||||
if br_limit - 1 <= math.floor(f['tbr'] / 100) <= br_limit + 1:
|
format_copy[0] = f.copy()
|
||||||
format_copy = f.copy()
|
if [f.get('width'), f.get('height')] == _QUALITY.get(tbr):
|
||||||
|
format_copy[1] = f.copy()
|
||||||
|
format_copy[1]['tbr'] = tbr
|
||||||
|
|
||||||
|
# prefer format with similar bitrate because there might be
|
||||||
|
# multiple video with the same resolution but different bitrate
|
||||||
|
format_copy = format_copy[0] or format_copy[1] or {}
|
||||||
return {
|
return {
|
||||||
|
'format_id': f'https-{tbr}',
|
||||||
'width': format_copy.get('width'),
|
'width': format_copy.get('width'),
|
||||||
'height': format_copy.get('height'),
|
'height': format_copy.get('height'),
|
||||||
'tbr': format_copy.get('tbr'),
|
'tbr': format_copy.get('tbr'),
|
||||||
'vcodec': format_copy.get('vcodec'),
|
'vcodec': format_copy.get('vcodec'),
|
||||||
'acodec': format_copy.get('acodec'),
|
'acodec': format_copy.get('acodec'),
|
||||||
'fps': format_copy.get('fps'),
|
'fps': format_copy.get('fps'),
|
||||||
'format_id': f'https-{tbr}',
|
|
||||||
} if format_copy else {
|
} if format_copy else {
|
||||||
|
'format_id': f'https-{tbr}',
|
||||||
'width': _QUALITY[tbr][0],
|
'width': _QUALITY[tbr][0],
|
||||||
'height': _QUALITY[tbr][1],
|
'height': _QUALITY[tbr][1],
|
||||||
'format_id': f'https-{tbr}',
|
'tbr': tbr,
|
||||||
'tbr': int(tbr),
|
'vcodec': 'avc1',
|
||||||
|
'acodec': 'mp4a',
|
||||||
|
'fps': 25,
|
||||||
}
|
}
|
||||||
|
|
||||||
loc = test_url(_MP4_TMPL % (relinker_url, '*'))
|
# filter out single-stream formats
|
||||||
if not isinstance(loc, compat_str):
|
fmts = [f for f in fmts
|
||||||
return []
|
if not f.get('vcodec') == 'none' and not f.get('acodec') == 'none']
|
||||||
|
|
||||||
mobj = re.match(
|
mobj = re.search(_MANIFEST_REG, manifest_url)
|
||||||
_RELINKER_REG,
|
|
||||||
test_url(relinker_url) or '')
|
|
||||||
if not mobj:
|
if not mobj:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
available_qualities = mobj.group('quality').split(',') if mobj.group('quality') else ['*']
|
available_qualities = mobj.group('quality').split(',') if mobj.group('quality') else ['*']
|
||||||
available_qualities = [i for i in available_qualities if i]
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for q in available_qualities:
|
for q in filter(None, available_qualities):
|
||||||
fmt = {
|
self.write_debug(f'Creating https format for quality {q}')
|
||||||
|
formats.append({
|
||||||
'url': _MP4_TMPL % (relinker_url, q),
|
'url': _MP4_TMPL % (relinker_url, q),
|
||||||
'protocol': 'https',
|
'protocol': 'https',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
**get_format_info(q)
|
**get_format_info(q)
|
||||||
}
|
})
|
||||||
formats.append(fmt)
|
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_thumbnails_list(thumbs, url):
|
||||||
|
return [{
|
||||||
|
'url': urljoin(url, thumb_url),
|
||||||
|
} for thumb_url in (thumbs or {}).values() if thumb_url]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_subtitles(url, video_data):
|
def _extract_subtitles(url, video_data):
|
||||||
STL_EXT = 'stl'
|
STL_EXT = 'stl'
|
||||||
SRT_EXT = 'srt'
|
SRT_EXT = 'srt'
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
subtitles_array = video_data.get('subtitlesArray') or []
|
subtitles_array = video_data.get('subtitlesArray') or video_data.get('subtitleList') or []
|
||||||
for k in ('subtitles', 'subtitlesUrl'):
|
for k in ('subtitles', 'subtitlesUrl'):
|
||||||
subtitles_array.append({'url': video_data.get(k)})
|
subtitles_array.append({'url': video_data.get(k)})
|
||||||
for subtitle in subtitles_array:
|
for subtitle in subtitles_array:
|
||||||
sub_url = subtitle.get('url')
|
sub_url = subtitle.get('url')
|
||||||
if sub_url and isinstance(sub_url, compat_str):
|
if sub_url and isinstance(sub_url, str):
|
||||||
sub_lang = subtitle.get('language') or 'it'
|
sub_lang = subtitle.get('language') or 'it'
|
||||||
sub_url = urljoin(url, sub_url)
|
sub_url = urljoin(url, sub_url)
|
||||||
sub_ext = determine_ext(sub_url, SRT_EXT)
|
sub_ext = determine_ext(sub_url, SRT_EXT)
|
||||||
|
@ -236,7 +221,7 @@ def _extract_subtitles(url, video_data):
|
||||||
class RaiPlayIE(RaiBaseIE):
|
class RaiPlayIE(RaiBaseIE):
|
||||||
_VALID_URL = rf'(?P<base>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>{RaiBaseIE._UUID_RE}))\.(?:html|json)'
|
_VALID_URL = rf'(?P<base>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>{RaiBaseIE._UUID_RE}))\.(?:html|json)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
|
'url': 'https://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
|
||||||
'md5': '8970abf8caf8aef4696e7b1f2adfc696',
|
'md5': '8970abf8caf8aef4696e7b1f2adfc696',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
|
'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
|
||||||
|
@ -244,22 +229,20 @@ class RaiPlayIE(RaiBaseIE):
|
||||||
'title': 'Report del 07/04/2014',
|
'title': 'Report del 07/04/2014',
|
||||||
'alt_title': 'St 2013/14 - Report - Espresso nel caffè - 07/04/2014',
|
'alt_title': 'St 2013/14 - Report - Espresso nel caffè - 07/04/2014',
|
||||||
'description': 'md5:d730c168a58f4bb35600fc2f881ec04e',
|
'description': 'md5:d730c168a58f4bb35600fc2f881ec04e',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://www\.raiplay\.it/.+\.jpg',
|
||||||
'uploader': 'Rai Gulp',
|
'uploader': 'Rai 3',
|
||||||
|
'creator': 'Rai 3',
|
||||||
'duration': 6160,
|
'duration': 6160,
|
||||||
'series': 'Report',
|
'series': 'Report',
|
||||||
'season': '2013/14',
|
'season': '2013/14',
|
||||||
'subtitles': {
|
'subtitles': {'it': 'count:4'},
|
||||||
'it': 'count:4',
|
|
||||||
},
|
|
||||||
'release_year': 2022,
|
'release_year': 2022,
|
||||||
'episode': 'Espresso nel caffè - 07/04/2014',
|
'episode': 'Espresso nel caffè - 07/04/2014',
|
||||||
'timestamp': 1396919880,
|
'timestamp': 1396919880,
|
||||||
'upload_date': '20140408',
|
'upload_date': '20140408',
|
||||||
|
'formats': 'count:4',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {'skip_download': True},
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
# 1080p direct mp4 url
|
# 1080p direct mp4 url
|
||||||
'url': 'https://www.raiplay.it/video/2021/11/Blanca-S1E1-Senza-occhi-b1255a4a-8e72-4a2f-b9f3-fc1308e00736.html',
|
'url': 'https://www.raiplay.it/video/2021/11/Blanca-S1E1-Senza-occhi-b1255a4a-8e72-4a2f-b9f3-fc1308e00736.html',
|
||||||
|
@ -270,8 +253,9 @@ class RaiPlayIE(RaiBaseIE):
|
||||||
'title': 'Blanca - S1E1 - Senza occhi',
|
'title': 'Blanca - S1E1 - Senza occhi',
|
||||||
'alt_title': 'St 1 Ep 1 - Blanca - Senza occhi',
|
'alt_title': 'St 1 Ep 1 - Blanca - Senza occhi',
|
||||||
'description': 'md5:75f95d5c030ec8bac263b1212322e28c',
|
'description': 'md5:75f95d5c030ec8bac263b1212322e28c',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https://www\.raiplay\.it/dl/img/.+\.jpg',
|
||||||
'uploader': 'Rai 1',
|
'uploader': 'Rai Premium',
|
||||||
|
'creator': 'Rai Fiction',
|
||||||
'duration': 6493,
|
'duration': 6493,
|
||||||
'series': 'Blanca',
|
'series': 'Blanca',
|
||||||
'season': 'Season 1',
|
'season': 'Season 1',
|
||||||
|
@ -281,6 +265,30 @@ class RaiPlayIE(RaiBaseIE):
|
||||||
'episode': 'Senza occhi',
|
'episode': 'Senza occhi',
|
||||||
'timestamp': 1637318940,
|
'timestamp': 1637318940,
|
||||||
'upload_date': '20211119',
|
'upload_date': '20211119',
|
||||||
|
'formats': 'count:12',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': True},
|
||||||
|
'expected_warnings': ['Video not available. Likely due to geo-restriction.']
|
||||||
|
}, {
|
||||||
|
# 1500 quality
|
||||||
|
'url': 'https://www.raiplay.it/video/2012/09/S1E11---Tutto-cio-che-luccica-0cab3323-732e-45d6-8e86-7704acab6598.html',
|
||||||
|
'md5': 'a634d20e8ab2d43724c273563f6bf87a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0cab3323-732e-45d6-8e86-7704acab6598',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Mia and Me - S1E11 - Tutto ciò che luccica',
|
||||||
|
'alt_title': 'St 1 Ep 11 - Mia and Me - Tutto ciò che luccica',
|
||||||
|
'description': 'md5:4969e594184b1920c4c1f2b704da9dea',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'uploader': 'Rai Gulp',
|
||||||
|
'series': 'Mia and Me',
|
||||||
|
'season': 'Season 1',
|
||||||
|
'episode_number': 11,
|
||||||
|
'release_year': 2015,
|
||||||
|
'season_number': 1,
|
||||||
|
'episode': 'Tutto ciò che luccica',
|
||||||
|
'timestamp': 1348495020,
|
||||||
|
'upload_date': '20120924',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
|
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
|
||||||
|
@ -299,57 +307,40 @@ def _real_extract(self, url):
|
||||||
base, video_id = self._match_valid_url(url).groups()
|
base, video_id = self._match_valid_url(url).groups()
|
||||||
|
|
||||||
media = self._download_json(
|
media = self._download_json(
|
||||||
base + '.json', video_id, 'Downloading video JSON')
|
f'{base}.json', video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
if not self.get_param('allow_unplayable_formats'):
|
if not self.get_param('allow_unplayable_formats'):
|
||||||
if try_get(
|
if traverse_obj(media, (('program_info', None), 'rights_management', 'rights', 'drm')):
|
||||||
media,
|
|
||||||
(lambda x: x['rights_management']['rights']['drm'],
|
|
||||||
lambda x: x['program_info']['rights_management']['rights']['drm']),
|
|
||||||
dict):
|
|
||||||
self.report_drm(video_id)
|
self.report_drm(video_id)
|
||||||
|
|
||||||
title = media['name']
|
|
||||||
video = media['video']
|
video = media['video']
|
||||||
|
|
||||||
relinker_info = self._extract_relinker_info(video['content_url'], video_id)
|
relinker_info = self._extract_relinker_info(video['content_url'], video_id)
|
||||||
|
date_published = join_nonempty(
|
||||||
thumbnails = []
|
media.get('date_published'), media.get('time_published'), delim=' ')
|
||||||
for _, value in media.get('images', {}).items():
|
|
||||||
if value:
|
|
||||||
thumbnails.append({
|
|
||||||
'url': urljoin(url, value),
|
|
||||||
})
|
|
||||||
|
|
||||||
date_published = media.get('date_published')
|
|
||||||
time_published = media.get('time_published')
|
|
||||||
if date_published and time_published:
|
|
||||||
date_published += ' ' + time_published
|
|
||||||
|
|
||||||
subtitles = self._extract_subtitles(url, video)
|
|
||||||
|
|
||||||
program_info = media.get('program_info') or {}
|
|
||||||
season = media.get('season')
|
season = media.get('season')
|
||||||
|
|
||||||
alt_title = join_nonempty(media.get('subtitle'), media.get('toptitle'), delim=' - ')
|
alt_title = join_nonempty(media.get('subtitle'), media.get('toptitle'), delim=' - ')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': remove_start(media.get('id'), 'ContentItem-') or video_id,
|
'id': remove_start(media.get('id'), 'ContentItem-') or video_id,
|
||||||
'display_id': video_id,
|
'display_id': video_id,
|
||||||
'title': title,
|
'title': media.get('name'),
|
||||||
'alt_title': strip_or_none(alt_title or None),
|
'alt_title': strip_or_none(alt_title or None),
|
||||||
'description': media.get('description'),
|
'description': media.get('description'),
|
||||||
'uploader': strip_or_none(media.get('channel') or None),
|
'uploader': strip_or_none(
|
||||||
'creator': strip_or_none(media.get('editor') or None),
|
traverse_obj(media, ('program_info', 'channel'))
|
||||||
|
or media.get('channel') or None),
|
||||||
|
'creator': strip_or_none(
|
||||||
|
traverse_obj(media, ('program_info', 'editor'))
|
||||||
|
or media.get('editor') or None),
|
||||||
'duration': parse_duration(video.get('duration')),
|
'duration': parse_duration(video.get('duration')),
|
||||||
'timestamp': unified_timestamp(date_published),
|
'timestamp': unified_timestamp(date_published),
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': self._get_thumbnails_list(media.get('images'), url),
|
||||||
'series': program_info.get('name'),
|
'series': traverse_obj(media, ('program_info', 'name')),
|
||||||
'season_number': int_or_none(season),
|
'season_number': int_or_none(season),
|
||||||
'season': season if (season and not season.isdigit()) else None,
|
'season': season if (season and not season.isdigit()) else None,
|
||||||
'episode': media.get('episode_title'),
|
'episode': media.get('episode_title'),
|
||||||
'episode_number': int_or_none(media.get('episode')),
|
'episode_number': int_or_none(media.get('episode')),
|
||||||
'subtitles': subtitles,
|
'subtitles': self._extract_subtitles(url, video),
|
||||||
'release_year': int_or_none(traverse_obj(media, ('track_info', 'edit_year'))),
|
'release_year': int_or_none(traverse_obj(media, ('track_info', 'edit_year'))),
|
||||||
**relinker_info
|
**relinker_info
|
||||||
}
|
}
|
||||||
|
@ -371,38 +362,39 @@ class RaiPlayLiveIE(RaiPlayIE): # XXX: Do not subclass from concrete IE
|
||||||
'live_status': 'is_live',
|
'live_status': 'is_live',
|
||||||
'upload_date': '20090502',
|
'upload_date': '20090502',
|
||||||
'timestamp': 1241276220,
|
'timestamp': 1241276220,
|
||||||
|
'formats': 'count:3',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {'skip_download': True},
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
||||||
class RaiPlayPlaylistIE(InfoExtractor):
|
class RaiPlayPlaylistIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+))(?:/(?P<extra_id>[^?#&]+))?'
|
_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+))(?:/(?P<extra_id>[^?#&]+))?'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
# entire series episodes + extras...
|
||||||
'url': 'https://www.raiplay.it/programmi/nondirloalmiocapo/',
|
'url': 'https://www.raiplay.it/programmi/nondirloalmiocapo/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'nondirloalmiocapo',
|
'id': 'nondirloalmiocapo',
|
||||||
'title': 'Non dirlo al mio capo',
|
'title': 'Non dirlo al mio capo',
|
||||||
'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b',
|
'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 12,
|
'playlist_mincount': 30,
|
||||||
}, {
|
}, {
|
||||||
|
# single season
|
||||||
'url': 'https://www.raiplay.it/programmi/nondirloalmiocapo/episodi/stagione-2/',
|
'url': 'https://www.raiplay.it/programmi/nondirloalmiocapo/episodi/stagione-2/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'nondirloalmiocapo',
|
'id': 'nondirloalmiocapo',
|
||||||
'title': 'Non dirlo al mio capo - Stagione 2',
|
'title': 'Non dirlo al mio capo - Stagione 2',
|
||||||
'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b',
|
'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 12,
|
'playlist_count': 12,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
base, playlist_id, extra_id = self._match_valid_url(url).groups()
|
base, playlist_id, extra_id = self._match_valid_url(url).groups()
|
||||||
|
|
||||||
program = self._download_json(
|
program = self._download_json(
|
||||||
base + '.json', playlist_id, 'Downloading program JSON')
|
f'{base}.json', playlist_id, 'Downloading program JSON')
|
||||||
|
|
||||||
if extra_id:
|
if extra_id:
|
||||||
extra_id = extra_id.upper().rstrip('/')
|
extra_id = extra_id.upper().rstrip('/')
|
||||||
|
@ -450,7 +442,7 @@ class RaiPlaySoundIE(RaiBaseIE):
|
||||||
'title': 'Il Ruggito del Coniglio del 10/12/2021',
|
'title': 'Il Ruggito del Coniglio del 10/12/2021',
|
||||||
'alt_title': 'md5:0e6476cd57858bb0f3fcc835d305b455',
|
'alt_title': 'md5:0e6476cd57858bb0f3fcc835d305b455',
|
||||||
'description': 'md5:2a17d2107e59a4a8faa0e18334139ee2',
|
'description': 'md5:2a17d2107e59a4a8faa0e18334139ee2',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.+\.jpg$',
|
||||||
'uploader': 'rai radio 2',
|
'uploader': 'rai radio 2',
|
||||||
'duration': 5685,
|
'duration': 5685,
|
||||||
'series': 'Il Ruggito del Coniglio',
|
'series': 'Il Ruggito del Coniglio',
|
||||||
|
@ -459,9 +451,7 @@ class RaiPlaySoundIE(RaiBaseIE):
|
||||||
'timestamp': 1638346620,
|
'timestamp': 1638346620,
|
||||||
'upload_date': '20211201',
|
'upload_date': '20211201',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {'skip_download': True},
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -480,9 +470,6 @@ def _real_extract(self, url):
|
||||||
lambda x: x['live']['create_date']))
|
lambda x: x['live']['create_date']))
|
||||||
|
|
||||||
podcast_info = traverse_obj(media, 'podcast_info', ('live', 'cards', 0)) or {}
|
podcast_info = traverse_obj(media, 'podcast_info', ('live', 'cards', 0)) or {}
|
||||||
thumbnails = [{
|
|
||||||
'url': urljoin(url, thumb_url),
|
|
||||||
} for thumb_url in (podcast_info.get('images') or {}).values() if thumb_url]
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
**info,
|
**info,
|
||||||
|
@ -494,7 +481,7 @@ def _real_extract(self, url):
|
||||||
'uploader': traverse_obj(media, ('track_info', 'channel'), expected_type=strip_or_none),
|
'uploader': traverse_obj(media, ('track_info', 'channel'), expected_type=strip_or_none),
|
||||||
'creator': traverse_obj(media, ('track_info', 'editor'), expected_type=strip_or_none),
|
'creator': traverse_obj(media, ('track_info', 'editor'), expected_type=strip_or_none),
|
||||||
'timestamp': unified_timestamp(date_published),
|
'timestamp': unified_timestamp(date_published),
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': self._get_thumbnails_list(podcast_info.get('images'), url),
|
||||||
'series': podcast_info.get('title'),
|
'series': podcast_info.get('title'),
|
||||||
'season_number': int_or_none(media.get('season')),
|
'season_number': int_or_none(media.get('season')),
|
||||||
'episode': media.get('episode_title'),
|
'episode': media.get('episode_title'),
|
||||||
|
@ -512,30 +499,30 @@ class RaiPlaySoundLiveIE(RaiPlaySoundIE): # XXX: Do not subclass from concrete
|
||||||
'display_id': 'radio2',
|
'display_id': 'radio2',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': r're:Rai Radio 2 \d+-\d+-\d+ \d+:\d+',
|
'title': r're:Rai Radio 2 \d+-\d+-\d+ \d+:\d+',
|
||||||
'thumbnail': r're:https://www.raiplaysound.it/dl/img/.+?png',
|
'thumbnail': r're:^https://www\.raiplaysound\.it/dl/img/.+\.png',
|
||||||
'uploader': 'rai radio 2',
|
'uploader': 'rai radio 2',
|
||||||
'series': 'Rai Radio 2',
|
'series': 'Rai Radio 2',
|
||||||
'creator': 'raiplaysound',
|
'creator': 'raiplaysound',
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
'live_status': 'is_live',
|
'live_status': 'is_live',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {'skip_download': True},
|
||||||
'skip_download': 'live',
|
|
||||||
},
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
||||||
class RaiPlaySoundPlaylistIE(InfoExtractor):
|
class RaiPlaySoundPlaylistIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplaysound\.it/(?:programmi|playlist|audiolibri)/(?P<id>[^/?#&]+))(?:/(?P<extra_id>[^?#&]+))?'
|
_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplaysound\.it/(?:programmi|playlist|audiolibri)/(?P<id>[^/?#&]+))(?:/(?P<extra_id>[^?#&]+))?'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
# entire show
|
||||||
'url': 'https://www.raiplaysound.it/programmi/ilruggitodelconiglio',
|
'url': 'https://www.raiplaysound.it/programmi/ilruggitodelconiglio',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'ilruggitodelconiglio',
|
'id': 'ilruggitodelconiglio',
|
||||||
'title': 'Il Ruggito del Coniglio',
|
'title': 'Il Ruggito del Coniglio',
|
||||||
'description': 'md5:1bbaf631245a7ab1ec4d9fbb3c7aa8f3',
|
'description': 'md5:48cff6972435964284614d70474132e6',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 65,
|
'playlist_mincount': 65,
|
||||||
}, {
|
}, {
|
||||||
|
# single season
|
||||||
'url': 'https://www.raiplaysound.it/programmi/ilruggitodelconiglio/puntate/prima-stagione-1995',
|
'url': 'https://www.raiplaysound.it/programmi/ilruggitodelconiglio/puntate/prima-stagione-1995',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'ilruggitodelconiglio_puntate_prima-stagione-1995',
|
'id': 'ilruggitodelconiglio_puntate_prima-stagione-1995',
|
||||||
|
@ -568,22 +555,19 @@ def _real_extract(self, url):
|
||||||
class RaiIE(RaiBaseIE):
|
class RaiIE(RaiBaseIE):
|
||||||
_VALID_URL = rf'https?://[^/]+\.(?:rai\.(?:it|tv))/.+?-(?P<id>{RaiBaseIE._UUID_RE})(?:-.+?)?\.html'
|
_VALID_URL = rf'https?://[^/]+\.(?:rai\.(?:it|tv))/.+?-(?P<id>{RaiBaseIE._UUID_RE})(?:-.+?)?\.html'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# var uniquename = "ContentItem-..."
|
|
||||||
# data-id="ContentItem-..."
|
|
||||||
'url': 'https://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html',
|
'url': 'https://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9',
|
'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'TG PRIMO TEMPO',
|
'title': 'TG PRIMO TEMPO',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'duration': 1758,
|
'duration': 1758,
|
||||||
'upload_date': '20140612',
|
'upload_date': '20140612',
|
||||||
},
|
},
|
||||||
'skip': 'This content is available only in Italy',
|
'params': {'skip_download': True},
|
||||||
|
'expected_warnings': ['Video not available. Likely due to geo-restriction.']
|
||||||
}, {
|
}, {
|
||||||
# with ContentItem in og:url
|
|
||||||
'url': 'https://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
|
'url': 'https://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
|
||||||
'md5': '06345bd97c932f19ffb129973d07a020',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
|
'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -592,123 +576,51 @@ class RaiIE(RaiBaseIE):
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'duration': 2214,
|
'duration': 2214,
|
||||||
'upload_date': '20161103'
|
'upload_date': '20161103'
|
||||||
}
|
},
|
||||||
|
'params': {'skip_download': True},
|
||||||
}, {
|
}, {
|
||||||
# Direct MMS URL
|
# Direct MMS: Media URL no longer works.
|
||||||
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
|
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_from_content_id(self, content_id, url):
|
def _real_extract(self, url):
|
||||||
|
content_id = self._match_id(url)
|
||||||
media = self._download_json(
|
media = self._download_json(
|
||||||
f'https://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-{content_id}.html?json',
|
f'https://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-{content_id}.html?json',
|
||||||
content_id, 'Downloading video JSON')
|
content_id, 'Downloading video JSON', fatal=False, expected_status=404)
|
||||||
|
|
||||||
title = media['name'].strip()
|
if media is None:
|
||||||
|
return None
|
||||||
|
|
||||||
media_type = media['type']
|
if 'Audio' in media['type']:
|
||||||
if 'Audio' in media_type:
|
|
||||||
relinker_info = {
|
relinker_info = {
|
||||||
'formats': [{
|
'formats': [{
|
||||||
'format_id': media.get('formatoAudio'),
|
'format_id': join_nonempty('https', media.get('formatoAudio'), delim='-'),
|
||||||
'url': media['audioUrl'],
|
'url': media['audioUrl'],
|
||||||
'ext': media.get('formatoAudio'),
|
'ext': media.get('formatoAudio'),
|
||||||
|
'vcodec': 'none',
|
||||||
|
'acodec': media.get('formatoAudio'),
|
||||||
}]
|
}]
|
||||||
}
|
}
|
||||||
elif 'Video' in media_type:
|
elif 'Video' in media['type']:
|
||||||
relinker_info = self._extract_relinker_info(media['mediaUri'], content_id)
|
relinker_info = self._extract_relinker_info(media['mediaUri'], content_id)
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('not a media file')
|
raise ExtractorError('not a media file')
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = self._get_thumbnails_list(
|
||||||
for image_type in ('image', 'image_medium', 'image_300'):
|
{image_type: media.get(image_type) for image_type in (
|
||||||
thumbnail_url = media.get(image_type)
|
'image', 'image_medium', 'image_300')}, url)
|
||||||
if thumbnail_url:
|
|
||||||
thumbnails.append({
|
|
||||||
'url': compat_urlparse.urljoin(url, thumbnail_url),
|
|
||||||
})
|
|
||||||
|
|
||||||
subtitles = self._extract_subtitles(url, media)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': content_id,
|
'id': content_id,
|
||||||
'title': title,
|
'title': strip_or_none(media.get('name') or media.get('title')),
|
||||||
'description': strip_or_none(media.get('desc') or None),
|
'description': strip_or_none(media.get('desc')) or None,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'uploader': strip_or_none(media.get('author') or None),
|
'uploader': strip_or_none(media.get('author')) or None,
|
||||||
'upload_date': unified_strdate(media.get('date')),
|
'upload_date': unified_strdate(media.get('date')),
|
||||||
'duration': parse_duration(media.get('length')),
|
'duration': parse_duration(media.get('length')),
|
||||||
'subtitles': subtitles,
|
'subtitles': self._extract_subtitles(url, media),
|
||||||
**relinker_info
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
content_item_id = None
|
|
||||||
|
|
||||||
content_item_url = self._html_search_meta(
|
|
||||||
('og:url', 'og:video', 'og:video:secure_url', 'twitter:url',
|
|
||||||
'twitter:player', 'jsonlink'), webpage, default=None)
|
|
||||||
if content_item_url:
|
|
||||||
content_item_id = self._search_regex(
|
|
||||||
rf'ContentItem-({self._UUID_RE})', content_item_url,
|
|
||||||
'content item id', default=None)
|
|
||||||
|
|
||||||
if not content_item_id:
|
|
||||||
content_item_id = self._search_regex(
|
|
||||||
rf'''(?x)
|
|
||||||
(?:
|
|
||||||
(?:initEdizione|drawMediaRaiTV)\(|
|
|
||||||
<(?:[^>]+\bdata-id|var\s+uniquename)=|
|
|
||||||
<iframe[^>]+\bsrc=
|
|
||||||
)
|
|
||||||
(["\'])
|
|
||||||
(?:(?!\1).)*\bContentItem-(?P<id>{self._UUID_RE})
|
|
||||||
''',
|
|
||||||
webpage, 'content item id', default=None, group='id')
|
|
||||||
|
|
||||||
content_item_ids = set()
|
|
||||||
if content_item_id:
|
|
||||||
content_item_ids.add(content_item_id)
|
|
||||||
if video_id not in content_item_ids:
|
|
||||||
content_item_ids.add(video_id)
|
|
||||||
|
|
||||||
for content_item_id in content_item_ids:
|
|
||||||
try:
|
|
||||||
return self._extract_from_content_id(content_item_id, url)
|
|
||||||
except GeoRestrictedError:
|
|
||||||
raise
|
|
||||||
except ExtractorError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
relinker_url = self._proto_relative_url(self._search_regex(
|
|
||||||
r'''(?x)
|
|
||||||
(?:
|
|
||||||
var\s+videoURL|
|
|
||||||
mediaInfo\.mediaUri
|
|
||||||
)\s*=\s*
|
|
||||||
([\'"])
|
|
||||||
(?P<url>
|
|
||||||
(?:https?:)?
|
|
||||||
//mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\?
|
|
||||||
(?:(?!\1).)*\bcont=(?:(?!\1).)+)\1
|
|
||||||
''',
|
|
||||||
webpage, 'relinker URL', group='url'))
|
|
||||||
|
|
||||||
relinker_info = self._extract_relinker_info(
|
|
||||||
urljoin(url, relinker_url), video_id)
|
|
||||||
|
|
||||||
title = self._search_regex(
|
|
||||||
r'var\s+videoTitolo\s*=\s*([\'"])(?P<title>[^\'"]+)\1',
|
|
||||||
webpage, 'title', group='title',
|
|
||||||
default=None) or self._og_search_title(webpage)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
**relinker_info
|
**relinker_info
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -726,7 +638,8 @@ class RaiNewsIE(RaiIE): # XXX: Do not subclass from concrete IE
|
||||||
'duration': 1589,
|
'duration': 1589,
|
||||||
'upload_date': '20220529',
|
'upload_date': '20220529',
|
||||||
'uploader': 'rainews',
|
'uploader': 'rainews',
|
||||||
}
|
},
|
||||||
|
'params': {'skip_download': True},
|
||||||
}, {
|
}, {
|
||||||
# old content with fallback method to extract media urls
|
# old content with fallback method to extract media urls
|
||||||
'url': 'https://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html',
|
'url': 'https://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html',
|
||||||
|
@ -739,12 +652,14 @@ class RaiNewsIE(RaiIE): # XXX: Do not subclass from concrete IE
|
||||||
'duration': 833,
|
'duration': 833,
|
||||||
'upload_date': '20161103'
|
'upload_date': '20161103'
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': True},
|
||||||
'expected_warnings': ['unable to extract player_data'],
|
'expected_warnings': ['unable to extract player_data'],
|
||||||
}, {
|
}, {
|
||||||
# iframe + drm
|
# iframe + drm
|
||||||
'url': 'https://www.rainews.it/iframe/video/2022/07/euro2022-europei-calcio-femminile-italia-belgio-gol-0-1-video-4de06a69-de75-4e32-a657-02f0885f8118.html',
|
'url': 'https://www.rainews.it/iframe/video/2022/07/euro2022-europei-calcio-femminile-italia-belgio-gol-0-1-video-4de06a69-de75-4e32-a657-02f0885f8118.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
_PLAYER_TAG = 'news'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
@ -752,8 +667,8 @@ def _real_extract(self, url):
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
player_data = self._search_json(
|
player_data = self._search_json(
|
||||||
r'<rainews-player\s*data=\'', webpage, 'player_data', video_id,
|
rf'<rai{self._PLAYER_TAG}-player\s*data=\'', webpage, 'player_data', video_id,
|
||||||
transform_source=clean_html, fatal=False)
|
transform_source=clean_html, default={})
|
||||||
track_info = player_data.get('track_info')
|
track_info = player_data.get('track_info')
|
||||||
relinker_url = traverse_obj(player_data, 'mediapolis', 'content_url')
|
relinker_url = traverse_obj(player_data, 'mediapolis', 'content_url')
|
||||||
|
|
||||||
|
@ -770,16 +685,36 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': track_info.get('title') or self._og_search_title(webpage),
|
'title': player_data.get('title') or track_info.get('title') or self._og_search_title(webpage),
|
||||||
'upload_date': unified_strdate(track_info.get('date')),
|
'upload_date': unified_strdate(track_info.get('date')),
|
||||||
'uploader': strip_or_none(track_info.get('editor') or None),
|
'uploader': strip_or_none(track_info.get('editor') or None),
|
||||||
**relinker_info
|
**relinker_info
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class RaiSudtirolIE(RaiBaseIE):
|
class RaiCulturaIE(RaiNewsIE): # XXX: Do not subclass from concrete IE
|
||||||
_VALID_URL = r'https?://raisudtirol\.rai\.it/.+?media=(?P<id>[TP]tv\d+)'
|
_VALID_URL = rf'https?://(www\.)?raicultura\.it/(?!articoli)[^?#]+-(?P<id>{RaiBaseIE._UUID_RE})(?:-[^/?#]+)?\.html'
|
||||||
|
_EMBED_REGEX = [rf'<iframe[^>]+data-src="(?P<url>/iframe/[^?#]+?{RaiBaseIE._UUID_RE}\.html)']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'url': 'https://www.raicultura.it/letteratura/articoli/2018/12/Alberto-Asor-Rosa-Letteratura-e-potere-05ba8775-82b5-45c5-a89d-dd955fbde1fb.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '05ba8775-82b5-45c5-a89d-dd955fbde1fb',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Alberto Asor Rosa: Letteratura e potere',
|
||||||
|
'duration': 1756,
|
||||||
|
'upload_date': '20181206',
|
||||||
|
'uploader': 'raicultura',
|
||||||
|
'formats': 'count:2',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': True},
|
||||||
|
}]
|
||||||
|
_PLAYER_TAG = 'cultura'
|
||||||
|
|
||||||
|
|
||||||
|
class RaiSudtirolIE(RaiBaseIE):
|
||||||
|
_VALID_URL = r'https?://raisudtirol\.rai\.it/.+media=(?P<id>\w+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# mp4 file
|
||||||
'url': 'https://raisudtirol.rai.it/la/index.php?media=Ptv1619729460',
|
'url': 'https://raisudtirol.rai.it/la/index.php?media=Ptv1619729460',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'Ptv1619729460',
|
'id': 'Ptv1619729460',
|
||||||
|
@ -787,34 +722,62 @@ class RaiSudtirolIE(RaiBaseIE):
|
||||||
'title': 'Euro: trasmisciun d\'economia - 29-04-2021 20:51',
|
'title': 'Euro: trasmisciun d\'economia - 29-04-2021 20:51',
|
||||||
'series': 'Euro: trasmisciun d\'economia',
|
'series': 'Euro: trasmisciun d\'economia',
|
||||||
'upload_date': '20210429',
|
'upload_date': '20210429',
|
||||||
'thumbnail': r're:https://raisudtirol\.rai\.it/img/.+?\.jpg',
|
'thumbnail': r're:https://raisudtirol\.rai\.it/img/.+\.jpg',
|
||||||
'uploader': 'raisudtirol',
|
'uploader': 'raisudtirol',
|
||||||
}
|
'formats': 'count:1',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': True},
|
||||||
|
}, {
|
||||||
|
# m3u manifest
|
||||||
|
'url': 'https://raisudtirol.rai.it/it/kidsplayer.php?lang=it&media=GUGGUG_P1.smil',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'GUGGUG_P1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'GUGGUG! La Prospettiva - Die Perspektive',
|
||||||
|
'uploader': 'raisudtirol',
|
||||||
|
'formats': 'count:6',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': True},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_date = self._html_search_regex(r'<span class="med_data">(.+?)</span>', webpage, 'video_date', fatal=False)
|
video_date = self._html_search_regex(
|
||||||
video_title = self._html_search_regex(r'<span class="med_title">(.+?)</span>', webpage, 'video_title', fatal=False)
|
r'<span class="med_data">(.+?)</span>', webpage, 'video_date', default=None)
|
||||||
video_url = self._html_search_regex(r'sources:\s*\[\{file:\s*"(.+?)"\}\]', webpage, 'video_url')
|
video_title = self._html_search_regex([
|
||||||
video_thumb = self._html_search_regex(r'image: \'(.+?)\'', webpage, 'video_thumb', fatal=False)
|
r'<span class="med_title">(.+?)</span>', r'title: \'(.+?)\','],
|
||||||
|
webpage, 'video_title', default=None)
|
||||||
|
video_url = self._html_search_regex([
|
||||||
|
r'sources:\s*\[\{file:\s*"(.+?)"\}\]',
|
||||||
|
r'<source\s+src="(.+?)"\s+type="application/x-mpegURL"'],
|
||||||
|
webpage, 'video_url', default=None)
|
||||||
|
|
||||||
return {
|
ext = determine_ext(video_url)
|
||||||
'id': video_id,
|
if ext == 'm3u8':
|
||||||
'title': join_nonempty(video_title, video_date, delim=' - '),
|
formats = self._extract_m3u8_formats(video_url, video_id)
|
||||||
'series': video_title,
|
elif ext == 'mp4':
|
||||||
'upload_date': unified_strdate(video_date),
|
formats = [{
|
||||||
'thumbnail': urljoin('https://raisudtirol.rai.it/', video_thumb),
|
|
||||||
'uploader': 'raisudtirol',
|
|
||||||
'formats': [{
|
|
||||||
'format_id': 'https-mp4',
|
'format_id': 'https-mp4',
|
||||||
'url': self._proto_relative_url(video_url),
|
'url': self._proto_relative_url(video_url),
|
||||||
'width': 1024,
|
'width': 1024,
|
||||||
'height': 576,
|
'height': 576,
|
||||||
'fps': 25,
|
'fps': 25,
|
||||||
'vcodec': 'h264',
|
'vcodec': 'avc1',
|
||||||
'acodec': 'aac',
|
'acodec': 'mp4a',
|
||||||
}],
|
}]
|
||||||
|
else:
|
||||||
|
formats = []
|
||||||
|
self.raise_no_formats(f'Unrecognized media file: {video_url}')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': join_nonempty(video_title, video_date, delim=' - '),
|
||||||
|
'series': video_title if video_date else None,
|
||||||
|
'upload_date': unified_strdate(video_date),
|
||||||
|
'thumbnail': urljoin('https://raisudtirol.rai.it/', self._html_search_regex(
|
||||||
|
r'image: \'(.+?)\'', webpage, 'video_thumb', default=None)),
|
||||||
|
'uploader': 'raisudtirol',
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
43
yt_dlp/extractor/recurbate.py
Normal file
43
yt_dlp/extractor/recurbate.py
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
import urllib.error
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError, merge_dicts
|
||||||
|
|
||||||
|
|
||||||
|
class RecurbateIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?recurbate\.com/play\.php\?video=(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://recurbate.com/play.php?video=39161415',
|
||||||
|
'md5': 'dd2b4ec57aa3e3572cb5cf0997fca99f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '39161415',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:db48d09e4d93fc715f47fd3d6b7edd51',
|
||||||
|
'title': 'Performer zsnicole33 show on 2022-10-25 20:23, Chaturbate Archive – Recurbate',
|
||||||
|
'age_limit': 18,
|
||||||
|
},
|
||||||
|
'skip': 'Website require membership.',
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
SUBSCRIPTION_MISSING_MESSAGE = 'This video is only available for registered users; Set your authenticated browser user agent via the --user-agent parameter.'
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
try:
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 403:
|
||||||
|
self.raise_login_required(msg=SUBSCRIPTION_MISSING_MESSAGE, method='cookies')
|
||||||
|
raise
|
||||||
|
token = self._html_search_regex(r'data-token="([^"]+)"', webpage, 'token')
|
||||||
|
video_url = f'https://recurbate.com/api/get.php?video={video_id}&token={token}'
|
||||||
|
|
||||||
|
video_webpage = self._download_webpage(video_url, video_id)
|
||||||
|
if video_webpage == 'shall_subscribe':
|
||||||
|
self.raise_login_required(msg=SUBSCRIPTION_MISSING_MESSAGE, method='cookies')
|
||||||
|
entries = self._parse_html5_media_entries(video_url, video_webpage, video_id)
|
||||||
|
return merge_dicts({
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._html_extract_title(webpage, 'title'),
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'age_limit': self._rta_search(webpage),
|
||||||
|
}, entries[0])
|
|
@ -1,30 +1,80 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .internetvideoarchive import InternetVideoArchiveIE
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
clean_html,
|
||||||
|
float_or_none,
|
||||||
|
get_element_by_class,
|
||||||
|
join_nonempty,
|
||||||
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class RottenTomatoesIE(InfoExtractor):
|
class RottenTomatoesIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?rottentomatoes\.com/m/(?P<playlist>[^/]+)(?:/(?P<tr>trailers)(?:/(?P<id>\w+))?)?'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',
|
'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '11028566',
|
'id': '11028566',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Toy Story 3',
|
'title': 'Toy Story 3',
|
||||||
'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.',
|
'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.'
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
},
|
},
|
||||||
}
|
'skip': 'No longer available',
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.rottentomatoes.com/m/toy_story_3/trailers/VycaVoBKhGuk',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'VycaVoBKhGuk',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Toy Story 3: Trailer 2',
|
||||||
|
'description': '',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 149.941
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.rottentomatoes.com/m/toy_story_3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'toy_story_3',
|
||||||
|
'title': 'Toy Story 3',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 4,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'toy_story_3-trailers',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_videos(self, data, display_id):
|
||||||
|
for video in traverse_obj(data, (lambda _, v: v['publicId'] and v['file'] and v['type'] == 'hls')):
|
||||||
|
yield {
|
||||||
|
'formats': self._extract_m3u8_formats(
|
||||||
|
video['file'], display_id, 'mp4', m3u8_id='hls', fatal=False),
|
||||||
|
**traverse_obj(video, {
|
||||||
|
'id': 'publicId',
|
||||||
|
'title': 'title',
|
||||||
|
'description': 'description',
|
||||||
|
'duration': ('durationInSeconds', {float_or_none}),
|
||||||
|
'thumbnail': ('image', {url_or_none}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
playlist_id, trailers, video_id = self._match_valid_url(url).group('playlist', 'tr', 'id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
playlist_id = join_nonempty(playlist_id, trailers)
|
||||||
iva_id = self._search_regex(r'publishedid=(\d+)', webpage, 'internet video archive id')
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
data = self._search_json(
|
||||||
|
r'<script[^>]+\bid=["\'](?:heroV|v)ideos["\'][^>]*>', webpage,
|
||||||
|
'data', playlist_id, contains_pattern=r'\[{(?s:.+)}\]')
|
||||||
|
|
||||||
return {
|
if video_id:
|
||||||
'_type': 'url_transparent',
|
video_data = traverse_obj(data, lambda _, v: v['publicId'] == video_id)
|
||||||
'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?domain=www.videodetective.com&customerid=69249&playerid=641&publishedid=' + iva_id,
|
if not video_data:
|
||||||
'ie_key': InternetVideoArchiveIE.ie_key(),
|
raise ExtractorError('Unable to extract video from webpage')
|
||||||
'id': video_id,
|
return next(self._extract_videos(video_data, video_id))
|
||||||
'title': self._og_search_title(webpage),
|
|
||||||
}
|
return self.playlist_result(
|
||||||
|
self._extract_videos(data, playlist_id), playlist_id,
|
||||||
|
clean_html(get_element_by_class('scoreboard__title', webpage)))
|
||||||
|
|
|
@ -30,10 +30,7 @@ class TVPlayIE(InfoExtractor):
|
||||||
(?:
|
(?:
|
||||||
tvplay(?:\.skaties)?\.lv(?:/parraides)?|
|
tvplay(?:\.skaties)?\.lv(?:/parraides)?|
|
||||||
(?:tv3play|play\.tv3)\.lt(?:/programos)?|
|
(?:tv3play|play\.tv3)\.lt(?:/programos)?|
|
||||||
tv3play(?:\.tv3)?\.ee/sisu|
|
tv3play(?:\.tv3)?\.ee/sisu
|
||||||
(?:tv(?:3|6|8|10)play)\.se/program|
|
|
||||||
(?:(?:tv3play|viasat4play|tv6play)\.no|(?:tv3play)\.dk)/programmer|
|
|
||||||
play\.nova(?:tv)?\.bg/programi
|
|
||||||
)
|
)
|
||||||
/(?:[^/]+/)+
|
/(?:[^/]+/)+
|
||||||
)
|
)
|
||||||
|
@ -92,117 +89,6 @@ class TVPlayIE(InfoExtractor):
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
'url': 'http://www.tv3play.se/program/husraddarna/395385?autostart=true',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '395385',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Husräddarna S02E07',
|
|
||||||
'description': 'md5:f210c6c89f42d4fc39faa551be813777',
|
|
||||||
'duration': 2574,
|
|
||||||
'timestamp': 1400596321,
|
|
||||||
'upload_date': '20140520',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://www.tv6play.se/program/den-sista-dokusapan/266636?autostart=true',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '266636',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Den sista dokusåpan S01E08',
|
|
||||||
'description': 'md5:295be39c872520221b933830f660b110',
|
|
||||||
'duration': 1492,
|
|
||||||
'timestamp': 1330522854,
|
|
||||||
'upload_date': '20120229',
|
|
||||||
'age_limit': 18,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://www.tv8play.se/program/antikjakten/282756?autostart=true',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '282756',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Antikjakten S01E10',
|
|
||||||
'description': 'md5:1b201169beabd97e20c5ad0ad67b13b8',
|
|
||||||
'duration': 2646,
|
|
||||||
'timestamp': 1348575868,
|
|
||||||
'upload_date': '20120925',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://www.tv3play.no/programmer/anna-anka-soker-assistent/230898?autostart=true',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '230898',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Anna Anka søker assistent - Ep. 8',
|
|
||||||
'description': 'md5:f80916bf5bbe1c5f760d127f8dd71474',
|
|
||||||
'duration': 2656,
|
|
||||||
'timestamp': 1277720005,
|
|
||||||
'upload_date': '20100628',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://www.viasat4play.no/programmer/budbringerne/21873?autostart=true',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '21873',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Budbringerne program 10',
|
|
||||||
'description': 'md5:4db78dc4ec8a85bb04fd322a3ee5092d',
|
|
||||||
'duration': 1297,
|
|
||||||
'timestamp': 1254205102,
|
|
||||||
'upload_date': '20090929',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://www.tv6play.no/programmer/hotelinspektor-alex-polizzi/361883?autostart=true',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '361883',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Hotelinspektør Alex Polizzi - Ep. 10',
|
|
||||||
'description': 'md5:3ecf808db9ec96c862c8ecb3a7fdaf81',
|
|
||||||
'duration': 2594,
|
|
||||||
'timestamp': 1393236292,
|
|
||||||
'upload_date': '20140224',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://play.novatv.bg/programi/zdravei-bulgariya/624952?autostart=true',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '624952',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Здравей, България (12.06.2015 г.) ',
|
|
||||||
'description': 'md5:99f3700451ac5bb71a260268b8daefd7',
|
|
||||||
'duration': 8838,
|
|
||||||
'timestamp': 1434100372,
|
|
||||||
'upload_date': '20150612',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'https://play.nova.bg/programi/zdravei-bulgariya/764300?autostart=true',
|
|
||||||
'only_matching': True,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true',
|
'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -327,103 +213,6 @@ def _real_extract(self, url):
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class ViafreeIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'''(?x)
|
|
||||||
https?://
|
|
||||||
(?:www\.)?
|
|
||||||
viafree\.(?P<country>dk|no|se|fi)
|
|
||||||
/(?P<id>(?:program(?:mer)?|ohjelmat)?/(?:[^/]+/)+[^/?#&]+)
|
|
||||||
'''
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '757786',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Det beste vorspielet - Sesong 2 - Episode 1',
|
|
||||||
'description': 'md5:b632cb848331404ccacd8cd03e83b4c3',
|
|
||||||
'series': 'Det beste vorspielet',
|
|
||||||
'season_number': 2,
|
|
||||||
'duration': 1116,
|
|
||||||
'timestamp': 1471200600,
|
|
||||||
'upload_date': '20160814',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.viafree.dk/programmer/humor/comedy-central-roast-of-charlie-sheen/film/1047660',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '1047660',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Comedy Central Roast of Charlie Sheen - Comedy Central Roast of Charlie Sheen',
|
|
||||||
'description': 'md5:ec956d941ae9fd7c65a48fd64951dc6d',
|
|
||||||
'series': 'Comedy Central Roast of Charlie Sheen',
|
|
||||||
'season_number': 1,
|
|
||||||
'duration': 3747,
|
|
||||||
'timestamp': 1608246060,
|
|
||||||
'upload_date': '20201217'
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
# with relatedClips
|
|
||||||
'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-1',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
# Different og:image URL schema
|
|
||||||
'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.viafree.se/program/livsstil/husraddarna/sasong-2/avsnitt-2',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.viafree.dk/programmer/reality/paradise-hotel/saeson-7/episode-5',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.viafree.se/program/underhallning/i-like-radio-live/sasong-1/676869',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.viafree.fi/ohjelmat/entertainment/amazing-makeovers/kausi-7/jakso-2',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
_GEO_BYPASS = False
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
country, path = self._match_valid_url(url).groups()
|
|
||||||
content = self._download_json(
|
|
||||||
'https://viafree-content.mtg-api.com/viafree-content/v1/%s/path/%s' % (country, path), path)
|
|
||||||
program = content['_embedded']['viafreeBlocks'][0]['_embedded']['program']
|
|
||||||
guid = program['guid']
|
|
||||||
meta = content['meta']
|
|
||||||
title = meta['title']
|
|
||||||
|
|
||||||
try:
|
|
||||||
stream_href = self._download_json(
|
|
||||||
program['_links']['streamLink']['href'], guid,
|
|
||||||
headers=self.geo_verification_headers())['embedded']['prioritizedStreams'][0]['links']['stream']['href']
|
|
||||||
except ExtractorError as e:
|
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
|
||||||
self.raise_geo_restricted(countries=[country])
|
|
||||||
raise
|
|
||||||
|
|
||||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(stream_href, guid, 'mp4')
|
|
||||||
episode = program.get('episode') or {}
|
|
||||||
return {
|
|
||||||
'id': guid,
|
|
||||||
'title': title,
|
|
||||||
'thumbnail': meta.get('image'),
|
|
||||||
'description': meta.get('description'),
|
|
||||||
'series': episode.get('seriesTitle'),
|
|
||||||
'subtitles': subtitles,
|
|
||||||
'episode_number': int_or_none(episode.get('episodeNumber')),
|
|
||||||
'season_number': int_or_none(episode.get('seasonNumber')),
|
|
||||||
'duration': int_or_none(try_get(program, lambda x: x['video']['duration']['milliseconds']), 1000),
|
|
||||||
'timestamp': parse_iso8601(try_get(program, lambda x: x['availability']['start'])),
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class TVPlayHomeIE(InfoExtractor):
|
class TVPlayHomeIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
|
|
|
@ -41,7 +41,6 @@ class TwitchBaseIE(InfoExtractor):
|
||||||
_USHER_BASE = 'https://usher.ttvnw.net'
|
_USHER_BASE = 'https://usher.ttvnw.net'
|
||||||
_LOGIN_FORM_URL = 'https://www.twitch.tv/login'
|
_LOGIN_FORM_URL = 'https://www.twitch.tv/login'
|
||||||
_LOGIN_POST_URL = 'https://passport.twitch.tv/login'
|
_LOGIN_POST_URL = 'https://passport.twitch.tv/login'
|
||||||
_CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'
|
|
||||||
_NETRC_MACHINE = 'twitch'
|
_NETRC_MACHINE = 'twitch'
|
||||||
|
|
||||||
_OPERATION_HASHES = {
|
_OPERATION_HASHES = {
|
||||||
|
@ -58,6 +57,11 @@ class TwitchBaseIE(InfoExtractor):
|
||||||
'VideoPlayer_VODSeekbarPreviewVideo': '07e99e4d56c5a7c67117a154777b0baf85a5ffefa393b213f4bc712ccaf85dd6',
|
'VideoPlayer_VODSeekbarPreviewVideo': '07e99e4d56c5a7c67117a154777b0baf85a5ffefa393b213f4bc712ccaf85dd6',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _CLIENT_ID(self):
|
||||||
|
return self._configuration_arg(
|
||||||
|
'client_id', ['ue6666qo983tsx6so1t0vnawi233wa'], ie_key=TwitchStreamIE, casesense=True)[0]
|
||||||
|
|
||||||
def _perform_login(self, username, password):
|
def _perform_login(self, username, password):
|
||||||
def fail(message):
|
def fail(message):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
|
@ -194,7 +198,8 @@ class TwitchVodIE(TwitchBaseIE):
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
|
(?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
|
||||||
player\.twitch\.tv/\?.*?\bvideo=v?
|
player\.twitch\.tv/\?.*?\bvideo=v?|
|
||||||
|
www\.twitch\.tv/[^/]+/schedule\?vodID=
|
||||||
)
|
)
|
||||||
(?P<id>\d+)
|
(?P<id>\d+)
|
||||||
'''
|
'''
|
||||||
|
@ -363,6 +368,9 @@ class TwitchVodIE(TwitchBaseIE):
|
||||||
'skip_download': True
|
'skip_download': True
|
||||||
},
|
},
|
||||||
'expected_warnings': ['Unable to download JSON metadata: HTTP Error 403: Forbidden']
|
'expected_warnings': ['Unable to download JSON metadata: HTTP Error 403: Forbidden']
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.twitch.tv/tangotek/schedule?vodID=1822395420',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _download_info(self, item_id):
|
def _download_info(self, item_id):
|
||||||
|
@ -1075,7 +1083,7 @@ class TwitchClipsIE(TwitchBaseIE):
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|
|
clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|
|
||||||
(?:(?:www|go|m)\.)?twitch\.tv/[^/]+/clip/
|
(?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/)?clip/
|
||||||
)
|
)
|
||||||
(?P<id>[^/?#&]+)
|
(?P<id>[^/?#&]+)
|
||||||
'''
|
'''
|
||||||
|
@ -1111,6 +1119,9 @@ class TwitchClipsIE(TwitchBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://go.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
|
'url': 'https://go.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://m.twitch.tv/clip/FaintLightGullWholeWheat',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -705,6 +705,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'uploader': r're:Monique Camarra.+?',
|
'uploader': r're:Monique Camarra.+?',
|
||||||
'uploader_id': 'MoniqueCamarra',
|
'uploader_id': 'MoniqueCamarra',
|
||||||
'live_status': 'was_live',
|
'live_status': 'was_live',
|
||||||
|
'release_timestamp': 1658417414,
|
||||||
'description': 'md5:acce559345fd49f129c20dbcda3f1201',
|
'description': 'md5:acce559345fd49f129c20dbcda3f1201',
|
||||||
'timestamp': 1658407771464,
|
'timestamp': 1658407771464,
|
||||||
},
|
},
|
||||||
|
@ -1327,6 +1328,8 @@ def _real_extract(self, url):
|
||||||
'uploader_id': traverse_obj(
|
'uploader_id': traverse_obj(
|
||||||
metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
|
metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
|
||||||
'live_status': live_status,
|
'live_status': live_status,
|
||||||
|
'release_timestamp': try_call(
|
||||||
|
lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
|
||||||
'timestamp': metadata.get('created_at'),
|
'timestamp': metadata.get('created_at'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
|
@ -131,8 +131,9 @@ class KnownPiracyIE(UnsupportedInfoExtractor):
|
||||||
URLS = (
|
URLS = (
|
||||||
r'dood\.(?:to|watch|so|pm|wf|re)',
|
r'dood\.(?:to|watch|so|pm|wf|re)',
|
||||||
# Sites youtube-dl supports, but we won't
|
# Sites youtube-dl supports, but we won't
|
||||||
r'https://viewsb\.com',
|
r'viewsb\.com',
|
||||||
r'https://filemoon\.sx',
|
r'filemoon\.sx',
|
||||||
|
r'hentai\.animestigma\.com',
|
||||||
)
|
)
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
|
|
@ -1,45 +1,137 @@
|
||||||
from .common import InfoExtractor
|
import functools
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import urllib.error
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
|
from .gigya import GigyaBaseIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
clean_html,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
|
get_element_html_by_class,
|
||||||
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
|
jwt_encode_hs256,
|
||||||
|
make_archive_id,
|
||||||
|
parse_age_limit,
|
||||||
|
parse_iso8601,
|
||||||
|
str_or_none,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
unified_timestamp,
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class VRTIE(InfoExtractor):
|
class VRTBaseIE(GigyaBaseIE):
|
||||||
|
_GEO_BYPASS = False
|
||||||
|
_PLAYER_INFO = {
|
||||||
|
'platform': 'desktop',
|
||||||
|
'app': {
|
||||||
|
'type': 'browser',
|
||||||
|
'name': 'Chrome',
|
||||||
|
},
|
||||||
|
'device': 'undefined (undefined)',
|
||||||
|
'os': {
|
||||||
|
'name': 'Windows',
|
||||||
|
'version': 'x86_64'
|
||||||
|
},
|
||||||
|
'player': {
|
||||||
|
'name': 'VRT web player',
|
||||||
|
'version': '2.7.4-prod-2023-04-19T06:05:45'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
# From https://player.vrt.be/vrtnws/js/main.js & https://player.vrt.be/ketnet/js/main.fd1de01a40a1e3d842ea.js
|
||||||
|
_JWT_KEY_ID = '0-0Fp51UZykfaiCJrfTE3+oMI8zvDteYfPtR+2n1R+z8w='
|
||||||
|
_JWT_SIGNING_KEY = '2a9251d782700769fb856da5725daf38661874ca6f80ae7dc2b05ec1a81a24ae'
|
||||||
|
|
||||||
|
def _extract_formats_and_subtitles(self, data, video_id):
|
||||||
|
if traverse_obj(data, 'drm'):
|
||||||
|
self.report_drm(video_id)
|
||||||
|
|
||||||
|
formats, subtitles = [], {}
|
||||||
|
for target in traverse_obj(data, ('targetUrls', lambda _, v: url_or_none(v['url']) and v['type'])):
|
||||||
|
format_type = target['type'].upper()
|
||||||
|
format_url = target['url']
|
||||||
|
if format_type in ('HLS', 'HLS_AES'):
|
||||||
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
format_url, video_id, 'mp4', m3u8_id=format_type, fatal=False)
|
||||||
|
formats.extend(fmts)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
elif format_type == 'HDS':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
format_url, video_id, f4m_id=format_type, fatal=False))
|
||||||
|
elif format_type == 'MPEG_DASH':
|
||||||
|
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||||
|
format_url, video_id, mpd_id=format_type, fatal=False)
|
||||||
|
formats.extend(fmts)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
elif format_type == 'HSS':
|
||||||
|
fmts, subs = self._extract_ism_formats_and_subtitles(
|
||||||
|
format_url, video_id, ism_id='mss', fatal=False)
|
||||||
|
formats.extend(fmts)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_type,
|
||||||
|
'url': format_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
for sub in traverse_obj(data, ('subtitleUrls', lambda _, v: v['url'] and v['type'] == 'CLOSED')):
|
||||||
|
subtitles.setdefault('nl', []).append({'url': sub['url']})
|
||||||
|
|
||||||
|
return formats, subtitles
|
||||||
|
|
||||||
|
def _call_api(self, video_id, client='null', id_token=None, version='v2'):
|
||||||
|
player_info = {'exp': (round(time.time(), 3) + 900), **self._PLAYER_INFO}
|
||||||
|
player_token = self._download_json(
|
||||||
|
'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v2/tokens',
|
||||||
|
video_id, 'Downloading player token', headers={
|
||||||
|
**self.geo_verification_headers(),
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
}, data=json.dumps({
|
||||||
|
'identityToken': id_token or {},
|
||||||
|
'playerInfo': jwt_encode_hs256(player_info, self._JWT_SIGNING_KEY, headers={
|
||||||
|
'kid': self._JWT_KEY_ID
|
||||||
|
}).decode()
|
||||||
|
}, separators=(',', ':')).encode())['vrtPlayerToken']
|
||||||
|
|
||||||
|
return self._download_json(
|
||||||
|
f'https://media-services-public.vrt.be/media-aggregator/{version}/media-items/{video_id}',
|
||||||
|
video_id, 'Downloading API JSON', query={
|
||||||
|
'vrtPlayerToken': player_token,
|
||||||
|
'client': client,
|
||||||
|
}, expected_status=400)
|
||||||
|
|
||||||
|
|
||||||
|
class VRTIE(VRTBaseIE):
|
||||||
IE_DESC = 'VRT NWS, Flanders News, Flandern Info and Sporza'
|
IE_DESC = 'VRT NWS, Flanders News, Flandern Info and Sporza'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>vrt\.be/vrtnws|sporza\.be)/[a-z]{2}/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<site>vrt\.be/vrtnws|sporza\.be)/[a-z]{2}/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.vrt.be/vrtnws/nl/2019/05/15/beelden-van-binnenkant-notre-dame-een-maand-na-de-brand/',
|
'url': 'https://www.vrt.be/vrtnws/nl/2019/05/15/beelden-van-binnenkant-notre-dame-een-maand-na-de-brand/',
|
||||||
'md5': 'e1663accf5cf13f375f3cd0d10476669',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'pbs-pub-7855fc7b-1448-49bc-b073-316cb60caa71$vid-2ca50305-c38a-4762-9890-65cbd098b7bd',
|
'id': 'pbs-pub-7855fc7b-1448-49bc-b073-316cb60caa71$vid-2ca50305-c38a-4762-9890-65cbd098b7bd',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Beelden van binnenkant Notre-Dame, één maand na de brand',
|
'title': 'Beelden van binnenkant Notre-Dame, één maand na de brand',
|
||||||
'description': 'Op maandagavond 15 april ging een deel van het dakgebinte van de Parijse kathedraal in vlammen op.',
|
'description': 'md5:6fd85f999b2d1841aa5568f4bf02c3ff',
|
||||||
'timestamp': 1557924660,
|
|
||||||
'upload_date': '20190515',
|
|
||||||
'duration': 31.2,
|
'duration': 31.2,
|
||||||
|
'thumbnail': 'https://images.vrt.be/orig/2019/05/15/2d914d61-7710-11e9-abcc-02b7b76bf47f.jpg',
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://sporza.be/nl/2019/05/15/de-belgian-cats-zijn-klaar-voor-het-ek/',
|
'url': 'https://sporza.be/nl/2019/05/15/de-belgian-cats-zijn-klaar-voor-het-ek/',
|
||||||
'md5': '910bba927566e9ab992278f647eb4b75',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'pbs-pub-f2c86a46-8138-413a-a4b9-a0015a16ce2c$vid-1f112b31-e58e-4379-908d-aca6d80f8818',
|
'id': 'pbs-pub-f2c86a46-8138-413a-a4b9-a0015a16ce2c$vid-1f112b31-e58e-4379-908d-aca6d80f8818',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'De Belgian Cats zijn klaar voor het EK mét Ann Wauters',
|
'title': 'De Belgian Cats zijn klaar voor het EK',
|
||||||
'timestamp': 1557923760,
|
'description': 'Video: De Belgian Cats zijn klaar voor het EK mét Ann Wauters | basketbal, sport in het journaal',
|
||||||
'upload_date': '20190515',
|
|
||||||
'duration': 115.17,
|
'duration': 115.17,
|
||||||
|
'thumbnail': 'https://images.vrt.be/orig/2019/05/15/11c0dba3-770e-11e9-abcc-02b7b76bf47f.jpg',
|
||||||
},
|
},
|
||||||
}, {
|
'params': {'skip_download': 'm3u8'},
|
||||||
'url': 'https://www.vrt.be/vrtnws/en/2019/05/15/belgium_s-eurovision-entry-falls-at-the-first-hurdle/',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.vrt.be/vrtnws/de/2019/05/15/aus-fuer-eliott-im-halbfinale-des-eurosongfestivals/',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
}]
|
||||||
_CLIENT_MAP = {
|
_CLIENT_MAP = {
|
||||||
'vrt.be/vrtnws': 'vrtnieuws',
|
'vrt.be/vrtnws': 'vrtnieuws',
|
||||||
|
@ -49,34 +141,285 @@ class VRTIE(InfoExtractor):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
site, display_id = self._match_valid_url(url).groups()
|
site, display_id = self._match_valid_url(url).groups()
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
attrs = extract_attributes(self._search_regex(
|
attrs = extract_attributes(get_element_html_by_class('vrtvideo', webpage) or '')
|
||||||
r'(<[^>]+class="vrtvideo( [^"]*)?"[^>]*>)', webpage, 'vrt video'))
|
|
||||||
|
|
||||||
asset_id = attrs['data-video-id']
|
asset_id = attrs.get('data-video-id') or attrs['data-videoid']
|
||||||
publication_id = attrs.get('data-publication-id')
|
publication_id = traverse_obj(attrs, 'data-publication-id', 'data-publicationid')
|
||||||
if publication_id:
|
if publication_id:
|
||||||
asset_id = publication_id + '$' + asset_id
|
asset_id = f'{publication_id}${asset_id}'
|
||||||
client = attrs.get('data-client-code') or self._CLIENT_MAP[site]
|
client = traverse_obj(attrs, 'data-client-code', 'data-client') or self._CLIENT_MAP[site]
|
||||||
|
|
||||||
|
data = self._call_api(asset_id, client)
|
||||||
|
formats, subtitles = self._extract_formats_and_subtitles(data, asset_id)
|
||||||
|
|
||||||
title = strip_or_none(get_element_by_class(
|
|
||||||
'vrt-title', webpage) or self._html_search_meta(
|
|
||||||
['og:title', 'twitter:title', 'name'], webpage))
|
|
||||||
description = self._html_search_meta(
|
description = self._html_search_meta(
|
||||||
['og:description', 'twitter:description', 'description'], webpage)
|
['og:description', 'twitter:description', 'description'], webpage)
|
||||||
if description == '…':
|
if description == '…':
|
||||||
description = None
|
description = None
|
||||||
timestamp = unified_timestamp(self._html_search_meta(
|
|
||||||
'article:published_time', webpage))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
|
||||||
'id': asset_id,
|
'id': asset_id,
|
||||||
'display_id': display_id,
|
'formats': formats,
|
||||||
'title': title,
|
'subtitles': subtitles,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': attrs.get('data-posterimage'),
|
'thumbnail': url_or_none(attrs.get('data-posterimage')),
|
||||||
'timestamp': timestamp,
|
|
||||||
'duration': float_or_none(attrs.get('data-duration'), 1000),
|
'duration': float_or_none(attrs.get('data-duration'), 1000),
|
||||||
'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (client, asset_id),
|
'_old_archive_ids': [make_archive_id('Canvas', asset_id)],
|
||||||
'ie_key': 'Canvas',
|
**traverse_obj(data, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'description': ('shortDescription', {str}),
|
||||||
|
'duration': ('duration', {functools.partial(float_or_none, scale=1000)}),
|
||||||
|
'thumbnail': ('posterImageUrl', {url_or_none}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class VrtNUIE(VRTBaseIE):
|
||||||
|
IE_DESC = 'VRT MAX'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?vrt\.be/vrtnu/a-z/(?:[^/]+/){2}(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# CONTENT_IS_AGE_RESTRICTED
|
||||||
|
'url': 'https://www.vrt.be/vrtnu/a-z/de-ideale-wereld/2023-vj/de-ideale-wereld-d20230116/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'pbs-pub-855b00a8-6ce2-4032-ac4f-1fcf3ae78524$vid-d2243aa1-ec46-4e34-a55b-92568459906f',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Tom Waes',
|
||||||
|
'description': 'Satirisch actualiteitenmagazine met Ella Leyers. Tom Waes is te gast.',
|
||||||
|
'timestamp': 1673905125,
|
||||||
|
'release_timestamp': 1673905125,
|
||||||
|
'series': 'De ideale wereld',
|
||||||
|
'season_id': '1672830988794',
|
||||||
|
'episode': 'Aflevering 1',
|
||||||
|
'episode_number': 1,
|
||||||
|
'episode_id': '1672830988861',
|
||||||
|
'display_id': 'de-ideale-wereld-d20230116',
|
||||||
|
'channel': 'VRT',
|
||||||
|
'duration': 1939.0,
|
||||||
|
'thumbnail': 'https://images.vrt.be/orig/2023/01/10/1bb39cb3-9115-11ed-b07d-02b7b76bf47f.jpg',
|
||||||
|
'release_date': '20230116',
|
||||||
|
'upload_date': '20230116',
|
||||||
|
'age_limit': 12,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.vrt.be/vrtnu/a-z/buurman--wat-doet-u-nu-/6/buurman--wat-doet-u-nu--s6-trailer/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'pbs-pub-ad4050eb-d9e5-48c2-9ec8-b6c355032361$vid-0465537a-34a8-4617-8352-4d8d983b4eee',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Trailer seizoen 6 \'Buurman, wat doet u nu?\'',
|
||||||
|
'description': 'md5:197424726c61384b4e5c519f16c0cf02',
|
||||||
|
'timestamp': 1652940000,
|
||||||
|
'release_timestamp': 1652940000,
|
||||||
|
'series': 'Buurman, wat doet u nu?',
|
||||||
|
'season': 'Seizoen 6',
|
||||||
|
'season_number': 6,
|
||||||
|
'season_id': '1652344200907',
|
||||||
|
'episode': 'Aflevering 0',
|
||||||
|
'episode_number': 0,
|
||||||
|
'episode_id': '1652951873524',
|
||||||
|
'display_id': 'buurman--wat-doet-u-nu--s6-trailer',
|
||||||
|
'channel': 'VRT',
|
||||||
|
'duration': 33.13,
|
||||||
|
'thumbnail': 'https://images.vrt.be/orig/2022/05/23/3c234d21-da83-11ec-b07d-02b7b76bf47f.jpg',
|
||||||
|
'release_date': '20220519',
|
||||||
|
'upload_date': '20220519',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}]
|
||||||
|
_NETRC_MACHINE = 'vrtnu'
|
||||||
|
_authenticated = False
|
||||||
|
|
||||||
|
def _perform_login(self, username, password):
|
||||||
|
auth_info = self._gigya_login({
|
||||||
|
'APIKey': '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy',
|
||||||
|
'targetEnv': 'jssdk',
|
||||||
|
'loginID': username,
|
||||||
|
'password': password,
|
||||||
|
'authMode': 'cookie',
|
||||||
|
})
|
||||||
|
|
||||||
|
if auth_info.get('errorDetails'):
|
||||||
|
raise ExtractorError(f'Unable to login. VrtNU said: {auth_info["errorDetails"]}', expected=True)
|
||||||
|
|
||||||
|
# Sometimes authentication fails for no good reason, retry
|
||||||
|
for retry in self.RetryManager():
|
||||||
|
if retry.attempt > 1:
|
||||||
|
self._sleep(1, None)
|
||||||
|
try:
|
||||||
|
self._request_webpage(
|
||||||
|
'https://token.vrt.be/vrtnuinitlogin', None, note='Requesting XSRF Token',
|
||||||
|
errnote='Could not get XSRF Token', query={
|
||||||
|
'provider': 'site',
|
||||||
|
'destination': 'https://www.vrt.be/vrtnu/',
|
||||||
|
})
|
||||||
|
self._request_webpage(
|
||||||
|
'https://login.vrt.be/perform_login', None,
|
||||||
|
note='Performing login', errnote='Login failed',
|
||||||
|
query={'client_id': 'vrtnu-site'}, data=urlencode_postdata({
|
||||||
|
'UID': auth_info['UID'],
|
||||||
|
'UIDSignature': auth_info['UIDSignature'],
|
||||||
|
'signatureTimestamp': auth_info['signatureTimestamp'],
|
||||||
|
'_csrf': self._get_cookies('https://login.vrt.be').get('OIDCXSRF').value,
|
||||||
|
}))
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
|
||||||
|
retry.error = e
|
||||||
|
continue
|
||||||
|
raise
|
||||||
|
|
||||||
|
self._authenticated = True
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
parsed_url = urllib.parse.urlparse(url)
|
||||||
|
details = self._download_json(
|
||||||
|
f'{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path.rstrip("/")}.model.json',
|
||||||
|
display_id, 'Downloading asset JSON', 'Unable to download asset JSON')['details']
|
||||||
|
|
||||||
|
watch_info = traverse_obj(details, (
|
||||||
|
'actions', lambda _, v: v['type'] == 'watch-episode', {dict}), get_all=False) or {}
|
||||||
|
video_id = join_nonempty(
|
||||||
|
'episodePublicationId', 'episodeVideoId', delim='$', from_dict=watch_info)
|
||||||
|
if '$' not in video_id:
|
||||||
|
raise ExtractorError('Unable to extract video ID')
|
||||||
|
|
||||||
|
vrtnutoken = self._download_json(
|
||||||
|
'https://token.vrt.be/refreshtoken', video_id, note='Retrieving vrtnutoken',
|
||||||
|
errnote='Token refresh failed')['vrtnutoken'] if self._authenticated else None
|
||||||
|
|
||||||
|
video_info = self._call_api(video_id, 'vrtnu-web@PROD', vrtnutoken)
|
||||||
|
|
||||||
|
if 'title' not in video_info:
|
||||||
|
code = video_info.get('code')
|
||||||
|
if code in ('AUTHENTICATION_REQUIRED', 'CONTENT_IS_AGE_RESTRICTED'):
|
||||||
|
self.raise_login_required(code, method='password')
|
||||||
|
elif code in ('INVALID_LOCATION', 'CONTENT_AVAILABLE_ONLY_IN_BE'):
|
||||||
|
self.raise_geo_restricted(countries=['BE'])
|
||||||
|
elif code == 'CONTENT_AVAILABLE_ONLY_FOR_BE_RESIDENTS_AND_EXPATS':
|
||||||
|
if not self._authenticated:
|
||||||
|
self.raise_login_required(code, method='password')
|
||||||
|
self.raise_geo_restricted(countries=['BE'])
|
||||||
|
raise ExtractorError(code, expected=True)
|
||||||
|
|
||||||
|
formats, subtitles = self._extract_formats_and_subtitles(video_info, video_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
**traverse_obj(details, {
|
||||||
|
'title': 'title',
|
||||||
|
'description': ('description', {clean_html}),
|
||||||
|
'timestamp': ('data', 'episode', 'onTime', 'raw', {parse_iso8601}),
|
||||||
|
'release_timestamp': ('data', 'episode', 'onTime', 'raw', {parse_iso8601}),
|
||||||
|
'series': ('data', 'program', 'title'),
|
||||||
|
'season': ('data', 'season', 'title', 'value'),
|
||||||
|
'season_number': ('data', 'season', 'title', 'raw', {int_or_none}),
|
||||||
|
'season_id': ('data', 'season', 'id', {str_or_none}),
|
||||||
|
'episode': ('data', 'episode', 'number', 'value', {str_or_none}),
|
||||||
|
'episode_number': ('data', 'episode', 'number', 'raw', {int_or_none}),
|
||||||
|
'episode_id': ('data', 'episode', 'id', {str_or_none}),
|
||||||
|
'age_limit': ('data', 'episode', 'age', 'raw', {parse_age_limit}),
|
||||||
|
}),
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'channel': 'VRT',
|
||||||
|
'formats': formats,
|
||||||
|
'duration': float_or_none(video_info.get('duration'), 1000),
|
||||||
|
'thumbnail': url_or_none(video_info.get('posterImageUrl')),
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'_old_archive_ids': [make_archive_id('Canvas', video_id)],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class KetnetIE(VRTBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?P<id>(?:[^/]+/)*[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.ketnet.be/kijken/m/meisjes/6/meisjes-s6a5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'pbs-pub-39f8351c-a0a0-43e6-8394-205d597d6162$vid-5e306921-a9aa-4fa9-9f39-5b82c8f1028e',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Meisjes',
|
||||||
|
'episode': 'Reeks 6: Week 5',
|
||||||
|
'season': 'Reeks 6',
|
||||||
|
'series': 'Meisjes',
|
||||||
|
'timestamp': 1685251800,
|
||||||
|
'upload_date': '20230528',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
video = self._download_json(
|
||||||
|
'https://senior-bff.ketnet.be/graphql', display_id, query={
|
||||||
|
'query': '''{
|
||||||
|
video(id: "content/ketnet/nl/%s.model.json") {
|
||||||
|
description
|
||||||
|
episodeNr
|
||||||
|
imageUrl
|
||||||
|
mediaReference
|
||||||
|
programTitle
|
||||||
|
publicationDate
|
||||||
|
seasonTitle
|
||||||
|
subtitleVideodetail
|
||||||
|
titleVideodetail
|
||||||
|
}
|
||||||
|
}''' % display_id,
|
||||||
|
})['data']['video']
|
||||||
|
|
||||||
|
video_id = urllib.parse.unquote(video['mediaReference'])
|
||||||
|
data = self._call_api(video_id, 'ketnet@PROD', version='v1')
|
||||||
|
formats, subtitles = self._extract_formats_and_subtitles(data, video_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'_old_archive_ids': [make_archive_id('Canvas', video_id)],
|
||||||
|
**traverse_obj(video, {
|
||||||
|
'title': ('titleVideodetail', {str}),
|
||||||
|
'description': ('description', {str}),
|
||||||
|
'thumbnail': ('thumbnail', {url_or_none}),
|
||||||
|
'timestamp': ('publicationDate', {parse_iso8601}),
|
||||||
|
'series': ('programTitle', {str}),
|
||||||
|
'season': ('seasonTitle', {str}),
|
||||||
|
'episode': ('subtitleVideodetail', {str}),
|
||||||
|
'episode_number': ('episodeNr', {int_or_none}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class DagelijkseKostIE(VRTBaseIE):
|
||||||
|
IE_DESC = 'dagelijksekost.een.be'
|
||||||
|
_VALID_URL = r'https?://dagelijksekost\.een\.be/gerechten/(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Hachis parmentier met witloof',
|
||||||
|
'description': 'md5:9960478392d87f63567b5b117688cdc5',
|
||||||
|
'display_id': 'hachis-parmentier-met-witloof',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
video_id = self._html_search_regex(
|
||||||
|
r'data-url=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', group='id')
|
||||||
|
|
||||||
|
data = self._call_api(video_id, 'dako@prod', version='v1')
|
||||||
|
formats, subtitles = self._extract_formats_and_subtitles(data, video_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': strip_or_none(get_element_by_class(
|
||||||
|
'dish-metadata__title', webpage) or self._html_search_meta('twitter:title', webpage)),
|
||||||
|
'description': clean_html(get_element_by_class(
|
||||||
|
'dish-description', webpage)) or self._html_search_meta(
|
||||||
|
['description', 'twitter:description', 'og:description'], webpage),
|
||||||
|
'_old_archive_ids': [make_archive_id('Canvas', video_id)],
|
||||||
}
|
}
|
||||||
|
|
607
yt_dlp/extractor/weverse.py
Normal file
607
yt_dlp/extractor/weverse.py
Normal file
|
@ -0,0 +1,607 @@
|
||||||
|
import base64
|
||||||
|
import hashlib
|
||||||
|
import hmac
|
||||||
|
import itertools
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import urllib.error
|
||||||
|
import urllib.parse
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .naver import NaverBaseIE
|
||||||
|
from .youtube import YoutubeIE
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
UserNotLive,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
traverse_obj,
|
||||||
|
try_call,
|
||||||
|
update_url_query,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class WeverseBaseIE(InfoExtractor):
|
||||||
|
_NETRC_MACHINE = 'weverse'
|
||||||
|
_ACCOUNT_API_BASE = 'https://accountapi.weverse.io/web/api/v2'
|
||||||
|
_API_HEADERS = {
|
||||||
|
'Referer': 'https://weverse.io/',
|
||||||
|
'WEV-device-Id': str(uuid.uuid4()),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _perform_login(self, username, password):
|
||||||
|
if self._API_HEADERS.get('Authorization'):
|
||||||
|
return
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'x-acc-app-secret': '5419526f1c624b38b10787e5c10b2a7a',
|
||||||
|
'x-acc-app-version': '2.2.6',
|
||||||
|
'x-acc-language': 'en',
|
||||||
|
'x-acc-service-id': 'weverse',
|
||||||
|
'x-acc-trace-id': str(uuid.uuid4()),
|
||||||
|
'x-clog-user-device-id': str(uuid.uuid4()),
|
||||||
|
}
|
||||||
|
check_username = self._download_json(
|
||||||
|
f'{self._ACCOUNT_API_BASE}/signup/email/status', None,
|
||||||
|
note='Checking username', query={'email': username}, headers=headers)
|
||||||
|
if not check_username.get('hasPassword'):
|
||||||
|
raise ExtractorError('Invalid username provided', expected=True)
|
||||||
|
|
||||||
|
headers['content-type'] = 'application/json'
|
||||||
|
try:
|
||||||
|
auth = self._download_json(
|
||||||
|
f'{self._ACCOUNT_API_BASE}/auth/token/by-credentials', None, data=json.dumps({
|
||||||
|
'email': username,
|
||||||
|
'password': password,
|
||||||
|
}, separators=(',', ':')).encode(), headers=headers, note='Logging in')
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
|
||||||
|
raise ExtractorError('Invalid password provided', expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
WeverseBaseIE._API_HEADERS['Authorization'] = f'Bearer {auth["accessToken"]}'
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
if self._API_HEADERS.get('Authorization'):
|
||||||
|
return
|
||||||
|
|
||||||
|
token = try_call(lambda: self._get_cookies('https://weverse.io/')['we2_access_token'].value)
|
||||||
|
if not token:
|
||||||
|
self.raise_login_required()
|
||||||
|
|
||||||
|
WeverseBaseIE._API_HEADERS['Authorization'] = f'Bearer {token}'
|
||||||
|
|
||||||
|
def _call_api(self, ep, video_id, data=None, note='Downloading API JSON'):
|
||||||
|
# Ref: https://ssl.pstatic.net/static/wevweb/2_3_2_11101725/public/static/js/2488.a09b41ff.chunk.js
|
||||||
|
# From https://ssl.pstatic.net/static/wevweb/2_3_2_11101725/public/static/js/main.e206f7c1.js:
|
||||||
|
key = b'1b9cb6378d959b45714bec49971ade22e6e24e42'
|
||||||
|
api_path = update_url_query(ep, {
|
||||||
|
'appId': 'be4d79eb8fc7bd008ee82c8ec4ff6fd4',
|
||||||
|
'language': 'en',
|
||||||
|
'platform': 'WEB',
|
||||||
|
'wpf': 'pc',
|
||||||
|
})
|
||||||
|
wmsgpad = int(time.time() * 1000)
|
||||||
|
wmd = base64.b64encode(hmac.HMAC(
|
||||||
|
key, f'{api_path[:255]}{wmsgpad}'.encode(), digestmod=hashlib.sha1).digest()).decode()
|
||||||
|
headers = {'Content-Type': 'application/json'} if data else {}
|
||||||
|
try:
|
||||||
|
return self._download_json(
|
||||||
|
f'https://global.apis.naver.com/weverse/wevweb{api_path}', video_id, note=note,
|
||||||
|
data=data, headers={**self._API_HEADERS, **headers}, query={
|
||||||
|
'wmsgpad': wmsgpad,
|
||||||
|
'wmd': wmd,
|
||||||
|
})
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
|
||||||
|
self.raise_login_required(
|
||||||
|
'Session token has expired. Log in again or refresh cookies in browser')
|
||||||
|
elif isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 403:
|
||||||
|
raise ExtractorError('Your account does not have access to this content', expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _call_post_api(self, video_id):
|
||||||
|
return self._call_api(f'/post/v1.0/post-{video_id}?fieldSet=postV1', video_id)
|
||||||
|
|
||||||
|
def _get_community_id(self, channel):
|
||||||
|
return str(self._call_api(
|
||||||
|
f'/community/v1.0/communityIdUrlPathByUrlPathArtistCode?keyword={channel}',
|
||||||
|
channel, note='Fetching community ID')['communityId'])
|
||||||
|
|
||||||
|
def _get_formats(self, data, video_id):
|
||||||
|
formats = traverse_obj(data, ('videos', 'list', lambda _, v: url_or_none(v['source']), {
|
||||||
|
'url': 'source',
|
||||||
|
'width': ('encodingOption', 'width', {int_or_none}),
|
||||||
|
'height': ('encodingOption', 'height', {int_or_none}),
|
||||||
|
'vcodec': 'type',
|
||||||
|
'vbr': ('bitrate', 'video', {int_or_none}),
|
||||||
|
'abr': ('bitrate', 'audio', {int_or_none}),
|
||||||
|
'filesize': ('size', {int_or_none}),
|
||||||
|
'format_id': ('encodingOption', 'id', {str_or_none}),
|
||||||
|
}))
|
||||||
|
|
||||||
|
for stream in traverse_obj(data, ('streams', lambda _, v: v['type'] == 'HLS' and url_or_none(v['source']))):
|
||||||
|
query = {}
|
||||||
|
for param in traverse_obj(stream, ('keys', lambda _, v: v['type'] == 'param' and v['name'])):
|
||||||
|
query[param['name']] = param.get('value', '')
|
||||||
|
fmts = self._extract_m3u8_formats(
|
||||||
|
stream['source'], video_id, 'mp4', m3u8_id='hls', fatal=False, query=query)
|
||||||
|
if query:
|
||||||
|
for fmt in fmts:
|
||||||
|
fmt['url'] = update_url_query(fmt['url'], query)
|
||||||
|
fmt['extra_param_to_segment_url'] = urllib.parse.urlencode(query)
|
||||||
|
formats.extend(fmts)
|
||||||
|
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _get_subs(self, caption_url):
|
||||||
|
subs_ext_re = r'\.(?:ttml|vtt)'
|
||||||
|
replace_ext = lambda x, y: re.sub(subs_ext_re, y, x)
|
||||||
|
if re.search(subs_ext_re, caption_url):
|
||||||
|
return [replace_ext(caption_url, '.ttml'), replace_ext(caption_url, '.vtt')]
|
||||||
|
return [caption_url]
|
||||||
|
|
||||||
|
def _parse_post_meta(self, metadata):
|
||||||
|
return traverse_obj(metadata, {
|
||||||
|
'title': ((('extension', 'mediaInfo', 'title'), 'title'), {str}),
|
||||||
|
'description': ((('extension', 'mediaInfo', 'body'), 'body'), {str}),
|
||||||
|
'uploader': ('author', 'profileName', {str}),
|
||||||
|
'uploader_id': ('author', 'memberId', {str}),
|
||||||
|
'creator': ('community', 'communityName', {str}),
|
||||||
|
'channel_id': (('community', 'author'), 'communityId', {str_or_none}),
|
||||||
|
'duration': ('extension', 'video', 'playTime', {float_or_none}),
|
||||||
|
'timestamp': ('publishedAt', {lambda x: int_or_none(x, 1000)}),
|
||||||
|
'release_timestamp': ('extension', 'video', 'onAirStartAt', {lambda x: int_or_none(x, 1000)}),
|
||||||
|
'thumbnail': ('extension', (('mediaInfo', 'thumbnail', 'url'), ('video', 'thumb')), {url_or_none}),
|
||||||
|
'view_count': ('extension', 'video', 'playCount', {int_or_none}),
|
||||||
|
'like_count': ('extension', 'video', 'likeCount', {int_or_none}),
|
||||||
|
'comment_count': ('commentCount', {int_or_none}),
|
||||||
|
}, get_all=False)
|
||||||
|
|
||||||
|
def _extract_availability(self, data):
|
||||||
|
return self._availability(**traverse_obj(data, ((('extension', 'video'), None), {
|
||||||
|
'needs_premium': 'paid',
|
||||||
|
'needs_subscription': 'membershipOnly',
|
||||||
|
}), get_all=False, expected_type=bool), needs_auth=True)
|
||||||
|
|
||||||
|
def _extract_live_status(self, data):
|
||||||
|
data = traverse_obj(data, ('extension', 'video', {dict})) or {}
|
||||||
|
if data.get('type') == 'LIVE':
|
||||||
|
return traverse_obj({
|
||||||
|
'ONAIR': 'is_live',
|
||||||
|
'DONE': 'post_live',
|
||||||
|
'STANDBY': 'is_upcoming',
|
||||||
|
'DELAY': 'is_upcoming',
|
||||||
|
}, (data.get('status'), {str})) or 'is_live'
|
||||||
|
return 'was_live' if data.get('liveToVod') else 'not_live'
|
||||||
|
|
||||||
|
|
||||||
|
class WeverseIE(WeverseBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.|m\.)?weverse.io/(?P<artist>[^/?#]+)/live/(?P<id>[\d-]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://weverse.io/billlie/live/0-107323480',
|
||||||
|
'md5': '1fa849f00181eef9100d3c8254c47979',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0-107323480',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '행복한 평이루💜',
|
||||||
|
'description': '',
|
||||||
|
'uploader': 'Billlie',
|
||||||
|
'uploader_id': '5ae14aed7b7cdc65fa87c41fe06cc936',
|
||||||
|
'channel': 'billlie',
|
||||||
|
'channel_id': '72',
|
||||||
|
'channel_url': 'https://weverse.io/billlie',
|
||||||
|
'creator': 'Billlie',
|
||||||
|
'timestamp': 1666262062,
|
||||||
|
'upload_date': '20221020',
|
||||||
|
'release_timestamp': 1666262058,
|
||||||
|
'release_date': '20221020',
|
||||||
|
'duration': 3102,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'availability': 'needs_auth',
|
||||||
|
'live_status': 'was_live',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://weverse.io/lesserafim/live/2-102331763',
|
||||||
|
'md5': 'e46125c08b13a6c8c1f4565035cca987',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2-102331763',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '🎂김채원 생신🎂',
|
||||||
|
'description': '🎂김채원 생신🎂',
|
||||||
|
'uploader': 'LE SSERAFIM ',
|
||||||
|
'uploader_id': 'd26ddc1e258488a0a2b795218d14d59d',
|
||||||
|
'channel': 'lesserafim',
|
||||||
|
'channel_id': '47',
|
||||||
|
'channel_url': 'https://weverse.io/lesserafim',
|
||||||
|
'creator': 'LE SSERAFIM',
|
||||||
|
'timestamp': 1659353400,
|
||||||
|
'upload_date': '20220801',
|
||||||
|
'release_timestamp': 1659353400,
|
||||||
|
'release_date': '20220801',
|
||||||
|
'duration': 3006,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'availability': 'needs_auth',
|
||||||
|
'live_status': 'was_live',
|
||||||
|
'subtitles': {
|
||||||
|
'id_ID': 'count:2',
|
||||||
|
'en_US': 'count:2',
|
||||||
|
'es_ES': 'count:2',
|
||||||
|
'vi_VN': 'count:2',
|
||||||
|
'th_TH': 'count:2',
|
||||||
|
'zh_CN': 'count:2',
|
||||||
|
'zh_TW': 'count:2',
|
||||||
|
'ja_JP': 'count:2',
|
||||||
|
'ko_KR': 'count:2',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://weverse.io/treasure/live/2-117230416',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2-117230416',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': r're:스껄도려님 첫 스무살 생파🦋',
|
||||||
|
'description': '',
|
||||||
|
'uploader': 'TREASURE',
|
||||||
|
'uploader_id': '77eabbc449ca37f7970054a136f60082',
|
||||||
|
'channel': 'treasure',
|
||||||
|
'channel_id': '20',
|
||||||
|
'channel_url': 'https://weverse.io/treasure',
|
||||||
|
'creator': 'TREASURE',
|
||||||
|
'timestamp': 1680667651,
|
||||||
|
'upload_date': '20230405',
|
||||||
|
'release_timestamp': 1680667639,
|
||||||
|
'release_date': '20230405',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'availability': 'needs_auth',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
},
|
||||||
|
'skip': 'Livestream has ended',
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
channel, video_id = self._match_valid_url(url).group('artist', 'id')
|
||||||
|
post = self._call_post_api(video_id)
|
||||||
|
api_video_id = post['extension']['video']['videoId']
|
||||||
|
availability = self._extract_availability(post)
|
||||||
|
live_status = self._extract_live_status(post)
|
||||||
|
video_info, formats = {}, []
|
||||||
|
|
||||||
|
if live_status == 'is_upcoming':
|
||||||
|
self.raise_no_formats('Livestream has not yet started', expected=True)
|
||||||
|
|
||||||
|
elif live_status == 'is_live':
|
||||||
|
video_info = self._call_api(
|
||||||
|
f'/video/v1.0/lives/{api_video_id}/playInfo?preview.format=json&preview.version=v2',
|
||||||
|
video_id, note='Downloading live JSON')
|
||||||
|
playback = self._parse_json(video_info['lipPlayback'], video_id)
|
||||||
|
m3u8_url = traverse_obj(playback, (
|
||||||
|
'media', lambda _, v: v['protocol'] == 'HLS', 'path', {url_or_none}), get_all=False)
|
||||||
|
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', live=True)
|
||||||
|
|
||||||
|
elif live_status == 'post_live':
|
||||||
|
if availability in ('premium_only', 'subscriber_only'):
|
||||||
|
self.report_drm(video_id)
|
||||||
|
self.raise_no_formats(
|
||||||
|
'Livestream has ended and downloadable VOD is not available', expected=True)
|
||||||
|
|
||||||
|
else:
|
||||||
|
infra_video_id = post['extension']['video']['infraVideoId']
|
||||||
|
in_key = self._call_api(
|
||||||
|
f'/video/v1.0/vod/{api_video_id}/inKey?preview=false', video_id,
|
||||||
|
data=b'{}', note='Downloading VOD API key')['inKey']
|
||||||
|
|
||||||
|
video_info = self._download_json(
|
||||||
|
f'https://global.apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{infra_video_id}',
|
||||||
|
video_id, note='Downloading VOD JSON', query={
|
||||||
|
'key': in_key,
|
||||||
|
'sid': traverse_obj(post, ('extension', 'video', 'serviceId')) or '2070',
|
||||||
|
'pid': str(uuid.uuid4()),
|
||||||
|
'nonce': int(time.time() * 1000),
|
||||||
|
'devt': 'html5_pc',
|
||||||
|
'prv': 'Y' if post.get('membershipOnly') else 'N',
|
||||||
|
'aup': 'N',
|
||||||
|
'stpb': 'N',
|
||||||
|
'cpl': 'en',
|
||||||
|
'env': 'prod',
|
||||||
|
'lc': 'en',
|
||||||
|
'adi': '[{"adSystem":"null"}]',
|
||||||
|
'adu': '/',
|
||||||
|
})
|
||||||
|
|
||||||
|
formats = self._get_formats(video_info, video_id)
|
||||||
|
has_drm = traverse_obj(video_info, ('meta', 'provider', 'name', {str.lower})) == 'drm'
|
||||||
|
if has_drm and formats:
|
||||||
|
self.report_warning(
|
||||||
|
'Requested content is DRM-protected, only a 30-second preview is available', video_id)
|
||||||
|
elif has_drm and not formats:
|
||||||
|
self.report_drm(video_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'channel': channel,
|
||||||
|
'channel_url': f'https://weverse.io/{channel}',
|
||||||
|
'formats': formats,
|
||||||
|
'availability': availability,
|
||||||
|
'live_status': live_status,
|
||||||
|
**self._parse_post_meta(post),
|
||||||
|
**NaverBaseIE.process_subtitles(video_info, self._get_subs),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class WeverseMediaIE(WeverseBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.|m\.)?weverse.io/(?P<artist>[^/?#]+)/media/(?P<id>[\d-]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://weverse.io/billlie/media/4-116372884',
|
||||||
|
'md5': '8efc9cfd61b2f25209eb1a5326314d28',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'e-C9wLSQs6o',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Billlie | \'EUNOIA\' Performance Video (heartbeat ver.)',
|
||||||
|
'description': 'md5:6181caaf2a2397bca913ffe368c104e5',
|
||||||
|
'channel': 'Billlie',
|
||||||
|
'channel_id': 'UCyc9sUCxELTDK9vELO5Fzeg',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCyc9sUCxELTDK9vELO5Fzeg',
|
||||||
|
'uploader': 'Billlie',
|
||||||
|
'uploader_id': '@Billlie',
|
||||||
|
'uploader_url': 'http://www.youtube.com/@Billlie',
|
||||||
|
'upload_date': '20230403',
|
||||||
|
'duration': 211,
|
||||||
|
'age_limit': 0,
|
||||||
|
'playable_in_embed': True,
|
||||||
|
'live_status': 'not_live',
|
||||||
|
'availability': 'public',
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'thumbnail': 'https://i.ytimg.com/vi/e-C9wLSQs6o/maxresdefault.jpg',
|
||||||
|
'categories': ['Entertainment'],
|
||||||
|
'tags': 'count:7',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://weverse.io/billlie/media/3-102914520',
|
||||||
|
'md5': '031551fcbd716bc4f080cb6174a43d8a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3-102914520',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'From. SUHYEON🌸',
|
||||||
|
'description': 'Billlie 멤버별 독점 영상 공개💙💜',
|
||||||
|
'uploader': 'Billlie_official',
|
||||||
|
'uploader_id': 'f569c6e92f7eaffef0a395037dcaa54f',
|
||||||
|
'channel': 'billlie',
|
||||||
|
'channel_id': '72',
|
||||||
|
'channel_url': 'https://weverse.io/billlie',
|
||||||
|
'creator': 'Billlie',
|
||||||
|
'timestamp': 1662174000,
|
||||||
|
'upload_date': '20220903',
|
||||||
|
'release_timestamp': 1662174000,
|
||||||
|
'release_date': '20220903',
|
||||||
|
'duration': 17.0,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'availability': 'needs_auth',
|
||||||
|
'live_status': 'not_live',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
channel, video_id = self._match_valid_url(url).group('artist', 'id')
|
||||||
|
post = self._call_post_api(video_id)
|
||||||
|
media_type = traverse_obj(post, ('extension', 'mediaInfo', 'mediaType', {str.lower}))
|
||||||
|
youtube_id = traverse_obj(post, ('extension', 'youtube', 'youtubeVideoId', {str}))
|
||||||
|
|
||||||
|
if media_type == 'vod':
|
||||||
|
return self.url_result(f'https://weverse.io/{channel}/live/{video_id}', WeverseIE)
|
||||||
|
elif media_type == 'youtube' and youtube_id:
|
||||||
|
return self.url_result(youtube_id, YoutubeIE)
|
||||||
|
elif media_type == 'image':
|
||||||
|
self.raise_no_formats('No video content found in webpage', expected=True)
|
||||||
|
elif media_type:
|
||||||
|
raise ExtractorError(f'Unsupported media type "{media_type}"')
|
||||||
|
|
||||||
|
self.raise_no_formats('No video content found in webpage')
|
||||||
|
|
||||||
|
|
||||||
|
class WeverseMomentIE(WeverseBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.|m\.)?weverse.io/(?P<artist>[^/?#]+)/moment/(?P<uid>[\da-f]+)/post/(?P<id>[\d-]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://weverse.io/secretnumber/moment/66a07e164b56a696ee71c99315ffe27b/post/1-117229444',
|
||||||
|
'md5': '87733ac19a54081b7dfc2442036d282b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1-117229444',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '今日もめっちゃいい天気☀️🌤️',
|
||||||
|
'uploader': '레아',
|
||||||
|
'uploader_id': '66a07e164b56a696ee71c99315ffe27b',
|
||||||
|
'channel': 'secretnumber',
|
||||||
|
'channel_id': '56',
|
||||||
|
'creator': 'SECRET NUMBER',
|
||||||
|
'duration': 10,
|
||||||
|
'upload_date': '20230405',
|
||||||
|
'timestamp': 1680653968,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'availability': 'needs_auth',
|
||||||
|
},
|
||||||
|
'skip': 'Moment has expired',
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
channel, uploader_id, video_id = self._match_valid_url(url).group('artist', 'uid', 'id')
|
||||||
|
post = self._call_post_api(video_id)
|
||||||
|
api_video_id = post['extension']['moment']['video']['videoId']
|
||||||
|
video_info = self._call_api(
|
||||||
|
f'/cvideo/v1.0/cvideo-{api_video_id}/playInfo?videoId={api_video_id}', video_id,
|
||||||
|
note='Downloading moment JSON')['playInfo']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'channel': channel,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'formats': self._get_formats(video_info, video_id),
|
||||||
|
'availability': self._extract_availability(post),
|
||||||
|
**traverse_obj(post, {
|
||||||
|
'title': ((('extension', 'moment', 'body'), 'body'), {str}),
|
||||||
|
'uploader': ('author', 'profileName', {str}),
|
||||||
|
'creator': (('community', 'author'), 'communityName', {str}),
|
||||||
|
'channel_id': (('community', 'author'), 'communityId', {str_or_none}),
|
||||||
|
'duration': ('extension', 'moment', 'video', 'uploadInfo', 'playTime', {float_or_none}),
|
||||||
|
'timestamp': ('publishedAt', {lambda x: int_or_none(x, 1000)}),
|
||||||
|
'thumbnail': ('extension', 'moment', 'video', 'uploadInfo', 'imageUrl', {url_or_none}),
|
||||||
|
'like_count': ('emotionCount', {int_or_none}),
|
||||||
|
'comment_count': ('commentCount', {int_or_none}),
|
||||||
|
}, get_all=False),
|
||||||
|
**NaverBaseIE.process_subtitles(video_info, self._get_subs),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class WeverseTabBaseIE(WeverseBaseIE):
|
||||||
|
_ENDPOINT = None
|
||||||
|
_PATH = None
|
||||||
|
_QUERY = {}
|
||||||
|
_RESULT_IE = None
|
||||||
|
|
||||||
|
def _entries(self, channel_id, channel, first_page):
|
||||||
|
query = self._QUERY.copy()
|
||||||
|
|
||||||
|
for page in itertools.count(1):
|
||||||
|
posts = first_page if page == 1 else self._call_api(
|
||||||
|
update_url_query(self._ENDPOINT % channel_id, query), channel,
|
||||||
|
note=f'Downloading {self._PATH} tab page {page}')
|
||||||
|
|
||||||
|
for post in traverse_obj(posts, ('data', lambda _, v: v['postId'])):
|
||||||
|
yield self.url_result(
|
||||||
|
f'https://weverse.io/{channel}/{self._PATH}/{post["postId"]}',
|
||||||
|
self._RESULT_IE, post['postId'], **self._parse_post_meta(post),
|
||||||
|
channel=channel, channel_url=f'https://weverse.io/{channel}',
|
||||||
|
availability=self._extract_availability(post),
|
||||||
|
live_status=self._extract_live_status(post))
|
||||||
|
|
||||||
|
query['after'] = traverse_obj(posts, ('paging', 'nextParams', 'after', {str}))
|
||||||
|
if not query['after']:
|
||||||
|
break
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
channel = self._match_id(url)
|
||||||
|
channel_id = self._get_community_id(channel)
|
||||||
|
|
||||||
|
first_page = self._call_api(
|
||||||
|
update_url_query(self._ENDPOINT % channel_id, self._QUERY), channel,
|
||||||
|
note=f'Downloading {self._PATH} tab page 1')
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
self._entries(channel_id, channel, first_page), f'{channel}-{self._PATH}',
|
||||||
|
**traverse_obj(first_page, ('data', ..., {
|
||||||
|
'playlist_title': ('community', 'communityName', {str}),
|
||||||
|
'thumbnail': ('author', 'profileImageUrl', {url_or_none}),
|
||||||
|
}), get_all=False))
|
||||||
|
|
||||||
|
|
||||||
|
class WeverseLiveTabIE(WeverseTabBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.|m\.)?weverse.io/(?P<id>[^/?#]+)/live/?(?:[?#]|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://weverse.io/billlie/live/',
|
||||||
|
'playlist_mincount': 55,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'billlie-live',
|
||||||
|
'title': 'Billlie',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
_ENDPOINT = '/post/v1.0/community-%s/liveTabPosts'
|
||||||
|
_PATH = 'live'
|
||||||
|
_QUERY = {'fieldSet': 'postsV1'}
|
||||||
|
_RESULT_IE = WeverseIE
|
||||||
|
|
||||||
|
|
||||||
|
class WeverseMediaTabIE(WeverseTabBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.|m\.)?weverse.io/(?P<id>[^/?#]+)/media(?:/|/all|/new)?(?:[?#]|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://weverse.io/billlie/media/',
|
||||||
|
'playlist_mincount': 231,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'billlie-media',
|
||||||
|
'title': 'Billlie',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://weverse.io/lesserafim/media/all',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://weverse.io/lesserafim/media/new',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
_ENDPOINT = '/media/v1.0/community-%s/more'
|
||||||
|
_PATH = 'media'
|
||||||
|
_QUERY = {'fieldSet': 'postsV1', 'filterType': 'RECENT'}
|
||||||
|
_RESULT_IE = WeverseMediaIE
|
||||||
|
|
||||||
|
|
||||||
|
class WeverseLiveIE(WeverseBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.|m\.)?weverse.io/(?P<id>[^/?#]+)/?(?:[?#]|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://weverse.io/purplekiss',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3-116560493',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': r're:모하냥🫶🏻',
|
||||||
|
'description': '내일은 금요일~><',
|
||||||
|
'uploader': '채인',
|
||||||
|
'uploader_id': '1ffb1d9d904d6b3db2783f876eb9229d',
|
||||||
|
'channel': 'purplekiss',
|
||||||
|
'channel_id': '35',
|
||||||
|
'channel_url': 'https://weverse.io/purplekiss',
|
||||||
|
'creator': 'PURPLE KISS',
|
||||||
|
'timestamp': 1680780892,
|
||||||
|
'upload_date': '20230406',
|
||||||
|
'release_timestamp': 1680780883,
|
||||||
|
'release_date': '20230406',
|
||||||
|
'thumbnail': 'https://weverse-live.pstatic.net/v1.0/live/62044/thumb',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'availability': 'needs_auth',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
},
|
||||||
|
'skip': 'Livestream has ended',
|
||||||
|
}, {
|
||||||
|
'url': 'https://weverse.io/billlie/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
channel = self._match_id(url)
|
||||||
|
channel_id = self._get_community_id(channel)
|
||||||
|
|
||||||
|
video_id = traverse_obj(
|
||||||
|
self._call_api(update_url_query(f'/post/v1.0/community-{channel_id}/liveTab', {
|
||||||
|
'debugMessage': 'true',
|
||||||
|
'fields': 'onAirLivePosts.fieldSet(postsV1).limit(10),reservedLivePosts.fieldSet(postsV1).limit(10)',
|
||||||
|
}), channel, note='Downloading live JSON'), (
|
||||||
|
('onAirLivePosts', 'reservedLivePosts'), 'data',
|
||||||
|
lambda _, v: self._extract_live_status(v) in ('is_live', 'is_upcoming'), 'postId', {str}),
|
||||||
|
get_all=False)
|
||||||
|
|
||||||
|
if not video_id:
|
||||||
|
raise UserNotLive(video_id=channel)
|
||||||
|
|
||||||
|
return self.url_result(f'https://weverse.io/{channel}/live/{video_id}', WeverseIE)
|
86
yt_dlp/extractor/weyyak.py
Normal file
86
yt_dlp/extractor/weyyak.py
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
parse_age_limit,
|
||||||
|
traverse_obj,
|
||||||
|
unified_timestamp,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class WeyyakIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://weyyak\.com/(?P<lang>\w+)/(?:player/)?(?P<type>episode|movie)/(?P<id>\d+)'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'https://weyyak.com/en/player/episode/1341952/Ribat-Al-Hob-Episode49',
|
||||||
|
'md5': '0caf55c1a615531c8fe60f146ae46849',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1341952',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ribat Al Hob',
|
||||||
|
'duration': 2771,
|
||||||
|
'alt_title': 'رباط الحب',
|
||||||
|
'season': 'Season 1',
|
||||||
|
'season_number': 1,
|
||||||
|
'episode': 'Episode 49',
|
||||||
|
'episode_number': 49,
|
||||||
|
'timestamp': 1485907200,
|
||||||
|
'upload_date': '20170201',
|
||||||
|
'thumbnail': r're:^https://content\.weyyak\.com/.+/poster-image',
|
||||||
|
'categories': ['Drama', 'Thrillers', 'Romance'],
|
||||||
|
'tags': 'count:8',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://weyyak.com/en/movie/233255/8-Seconds',
|
||||||
|
'md5': 'fe740ae0f63e4d1c8a7fc147a410c564',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '233255',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '8 Seconds',
|
||||||
|
'duration': 6490,
|
||||||
|
'alt_title': '8 ثواني',
|
||||||
|
'description': 'md5:45b83a155c30b49950624c7e99600b9d',
|
||||||
|
'age_limit': 15,
|
||||||
|
'release_year': 2015,
|
||||||
|
'timestamp': 1683106031,
|
||||||
|
'upload_date': '20230503',
|
||||||
|
'thumbnail': r're:^https://content\.weyyak\.com/.+/poster-image',
|
||||||
|
'categories': ['Drama', 'Social'],
|
||||||
|
'cast': ['Ceylin Adiyaman', 'Esra Inal'],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, lang, type_ = self._match_valid_url(url).group('id', 'lang', 'type')
|
||||||
|
|
||||||
|
path = 'episode/' if type_ == 'episode' else 'contents/moviedetails?contentkey='
|
||||||
|
data = self._download_json(
|
||||||
|
f'https://msapifo-prod-me.weyyak.z5.com/v1/{lang}/{path}{video_id}', video_id)['data']
|
||||||
|
m3u8_url = self._download_json(
|
||||||
|
f'https://api-weyyak.akamaized.net/get_info/{data["video_id"]}',
|
||||||
|
video_id, 'Extracting video details')['url_video']
|
||||||
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
**traverse_obj(data, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'alt_title': ('translated_title', {str}),
|
||||||
|
'description': ('synopsis', {str}),
|
||||||
|
'duration': ('length', {float_or_none}),
|
||||||
|
'age_limit': ('age_rating', {parse_age_limit}),
|
||||||
|
'season_number': ('season_number', {int_or_none}),
|
||||||
|
'episode_number': ('episode_number', {int_or_none}),
|
||||||
|
'thumbnail': ('imagery', 'thumbnail', {url_or_none}),
|
||||||
|
'categories': ('genres', ..., {str}),
|
||||||
|
'tags': ('tags', ..., {str}),
|
||||||
|
'cast': (('main_actor', 'main_actress'), {str}),
|
||||||
|
'timestamp': ('insertedAt', {unified_timestamp}),
|
||||||
|
'release_year': ('production_year', {int_or_none}),
|
||||||
|
}),
|
||||||
|
}
|
|
@ -2,6 +2,7 @@
|
||||||
import binascii
|
import binascii
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
|
import uuid
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..dependencies import Cryptodome
|
from ..dependencies import Cryptodome
|
||||||
|
@ -12,30 +13,95 @@
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
try_call,
|
try_call,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class WrestleUniverseBaseIE(InfoExtractor):
|
class WrestleUniverseBaseIE(InfoExtractor):
|
||||||
|
_NETRC_MACHINE = 'wrestleuniverse'
|
||||||
_VALID_URL_TMPL = r'https?://(?:www\.)?wrestle-universe\.com/(?:(?P<lang>\w{2})/)?%s/(?P<id>\w+)'
|
_VALID_URL_TMPL = r'https?://(?:www\.)?wrestle-universe\.com/(?:(?P<lang>\w{2})/)?%s/(?P<id>\w+)'
|
||||||
_API_PATH = None
|
_API_PATH = None
|
||||||
_TOKEN = None
|
_REAL_TOKEN = None
|
||||||
_TOKEN_EXPIRY = None
|
_TOKEN_EXPIRY = None
|
||||||
|
_REFRESH_TOKEN = None
|
||||||
|
_DEVICE_ID = None
|
||||||
|
_LOGIN_QUERY = {'key': 'AIzaSyCaRPBsDQYVDUWWBXjsTrHESi2r_F3RAdA'}
|
||||||
|
_LOGIN_HEADERS = {
|
||||||
|
'Accept': '*/*',
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'X-Client-Version': 'Chrome/JsCore/9.9.4/FirebaseCore-web',
|
||||||
|
'X-Firebase-gmpid': '1:307308870738:web:820f38fe5150c8976e338b',
|
||||||
|
'Referer': 'https://www.wrestle-universe.com/',
|
||||||
|
'Origin': 'https://www.wrestle-universe.com',
|
||||||
|
}
|
||||||
|
|
||||||
def _get_token_cookie(self):
|
@property
|
||||||
if not self._TOKEN or not self._TOKEN_EXPIRY:
|
def _TOKEN(self):
|
||||||
self._TOKEN = try_call(lambda: self._get_cookies('https://www.wrestle-universe.com/')['token'].value)
|
if not self._REAL_TOKEN or not self._TOKEN_EXPIRY:
|
||||||
if not self._TOKEN:
|
token = try_call(lambda: self._get_cookies('https://www.wrestle-universe.com/')['token'].value)
|
||||||
|
if not token and not self._REFRESH_TOKEN:
|
||||||
self.raise_login_required()
|
self.raise_login_required()
|
||||||
expiry = traverse_obj(jwt_decode_hs256(self._TOKEN), ('exp', {int_or_none}))
|
self._REAL_TOKEN = token
|
||||||
if not expiry:
|
|
||||||
raise ExtractorError('There was a problem with the token cookie')
|
|
||||||
self._TOKEN_EXPIRY = expiry
|
|
||||||
|
|
||||||
if self._TOKEN_EXPIRY <= int(time.time()):
|
if not self._REAL_TOKEN or self._TOKEN_EXPIRY <= int(time.time()):
|
||||||
raise ExtractorError(
|
if not self._REFRESH_TOKEN:
|
||||||
'Expired token. Refresh your cookies in browser and try again', expected=True)
|
raise ExtractorError(
|
||||||
|
'Expired token. Refresh your cookies in browser and try again', expected=True)
|
||||||
|
self._refresh_token()
|
||||||
|
|
||||||
return self._TOKEN
|
return self._REAL_TOKEN
|
||||||
|
|
||||||
|
@_TOKEN.setter
|
||||||
|
def _TOKEN(self, value):
|
||||||
|
self._REAL_TOKEN = value
|
||||||
|
|
||||||
|
expiry = traverse_obj(value, ({jwt_decode_hs256}, 'exp', {int_or_none}))
|
||||||
|
if not expiry:
|
||||||
|
raise ExtractorError('There was a problem with the auth token')
|
||||||
|
self._TOKEN_EXPIRY = expiry
|
||||||
|
|
||||||
|
def _perform_login(self, username, password):
|
||||||
|
login = self._download_json(
|
||||||
|
'https://identitytoolkit.googleapis.com/v1/accounts:signInWithPassword', None,
|
||||||
|
'Logging in', query=self._LOGIN_QUERY, headers=self._LOGIN_HEADERS, data=json.dumps({
|
||||||
|
'returnSecureToken': True,
|
||||||
|
'email': username,
|
||||||
|
'password': password,
|
||||||
|
}, separators=(',', ':')).encode())
|
||||||
|
self._REFRESH_TOKEN = traverse_obj(login, ('refreshToken', {str}))
|
||||||
|
if not self._REFRESH_TOKEN:
|
||||||
|
self.report_warning('No refresh token was granted')
|
||||||
|
self._TOKEN = traverse_obj(login, ('idToken', {str}))
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
if WrestleUniverseBaseIE._DEVICE_ID:
|
||||||
|
return
|
||||||
|
|
||||||
|
WrestleUniverseBaseIE._DEVICE_ID = self._configuration_arg('device_id', [None], ie_key='WrestleUniverse')[0]
|
||||||
|
if not WrestleUniverseBaseIE._DEVICE_ID:
|
||||||
|
WrestleUniverseBaseIE._DEVICE_ID = self.cache.load(self._NETRC_MACHINE, 'device_id')
|
||||||
|
if WrestleUniverseBaseIE._DEVICE_ID:
|
||||||
|
return
|
||||||
|
WrestleUniverseBaseIE._DEVICE_ID = str(uuid.uuid4())
|
||||||
|
|
||||||
|
self.cache.store(self._NETRC_MACHINE, 'device_id', WrestleUniverseBaseIE._DEVICE_ID)
|
||||||
|
|
||||||
|
def _refresh_token(self):
|
||||||
|
refresh = self._download_json(
|
||||||
|
'https://securetoken.googleapis.com/v1/token', None, 'Refreshing token',
|
||||||
|
query=self._LOGIN_QUERY, data=urlencode_postdata({
|
||||||
|
'grant_type': 'refresh_token',
|
||||||
|
'refresh_token': self._REFRESH_TOKEN,
|
||||||
|
}), headers={
|
||||||
|
**self._LOGIN_HEADERS,
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
})
|
||||||
|
if traverse_obj(refresh, ('refresh_token', {str})):
|
||||||
|
self._REFRESH_TOKEN = refresh['refresh_token']
|
||||||
|
token = traverse_obj(refresh, 'access_token', 'id_token', expected_type=str)
|
||||||
|
if not token:
|
||||||
|
raise ExtractorError('No auth token returned from refresh request')
|
||||||
|
self._TOKEN = token
|
||||||
|
|
||||||
def _call_api(self, video_id, param='', msg='API', auth=True, data=None, query={}, fatal=True):
|
def _call_api(self, video_id, param='', msg='API', auth=True, data=None, query={}, fatal=True):
|
||||||
headers = {'CA-CID': ''}
|
headers = {'CA-CID': ''}
|
||||||
|
@ -43,7 +109,7 @@ def _call_api(self, video_id, param='', msg='API', auth=True, data=None, query={
|
||||||
headers['Content-Type'] = 'application/json;charset=utf-8'
|
headers['Content-Type'] = 'application/json;charset=utf-8'
|
||||||
data = json.dumps(data, separators=(',', ':')).encode()
|
data = json.dumps(data, separators=(',', ':')).encode()
|
||||||
if auth:
|
if auth:
|
||||||
headers['Authorization'] = f'Bearer {self._get_token_cookie()}'
|
headers['Authorization'] = f'Bearer {self._TOKEN}'
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
f'https://api.wrestle-universe.com/v1/{self._API_PATH}/{video_id}{param}', video_id,
|
f'https://api.wrestle-universe.com/v1/{self._API_PATH}/{video_id}{param}', video_id,
|
||||||
note=f'Downloading {msg} JSON', errnote=f'Failed to download {msg} JSON',
|
note=f'Downloading {msg} JSON', errnote=f'Failed to download {msg} JSON',
|
||||||
|
@ -65,7 +131,7 @@ def decrypt(data):
|
||||||
|
|
||||||
token = base64.b64encode(private_key.public_key().export_key('DER')).decode()
|
token = base64.b64encode(private_key.public_key().export_key('DER')).decode()
|
||||||
api_json = self._call_api(video_id, param, msg, data={
|
api_json = self._call_api(video_id, param, msg, data={
|
||||||
# 'deviceId' (random uuid4 generated at login) is not required yet
|
'deviceId': self._DEVICE_ID,
|
||||||
'token': token,
|
'token': token,
|
||||||
**data,
|
**data,
|
||||||
}, query=query, fatal=fatal)
|
}, query=query, fatal=fatal)
|
||||||
|
@ -105,7 +171,7 @@ class WrestleUniverseVODIE(WrestleUniverseBaseIE):
|
||||||
'upload_date': '20230129',
|
'upload_date': '20230129',
|
||||||
'thumbnail': 'https://image.asset.wrestle-universe.com/8FjD67P8rZc446RBQs5RBN/8FjD67P8rZc446RBQs5RBN',
|
'thumbnail': 'https://image.asset.wrestle-universe.com/8FjD67P8rZc446RBQs5RBN/8FjD67P8rZc446RBQs5RBN',
|
||||||
'chapters': 'count:7',
|
'chapters': 'count:7',
|
||||||
'cast': 'count:18',
|
'cast': 'count:21',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': 'm3u8',
|
'skip_download': 'm3u8',
|
||||||
|
@ -169,6 +235,7 @@ class WrestleUniversePPVIE(WrestleUniverseBaseIE):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': 'm3u8',
|
'skip_download': 'm3u8',
|
||||||
},
|
},
|
||||||
|
'skip': 'No longer available',
|
||||||
}, {
|
}, {
|
||||||
'note': 'unencrypted HLS',
|
'note': 'unencrypted HLS',
|
||||||
'url': 'https://www.wrestle-universe.com/en/lives/wUG8hP5iApC63jbtQzhVVx',
|
'url': 'https://www.wrestle-universe.com/en/lives/wUG8hP5iApC63jbtQzhVVx',
|
||||||
|
@ -196,14 +263,17 @@ def _real_extract(self, url):
|
||||||
lang, video_id = self._match_valid_url(url).group('lang', 'id')
|
lang, video_id = self._match_valid_url(url).group('lang', 'id')
|
||||||
metadata = self._download_metadata(url, video_id, lang, 'eventFallbackData')
|
metadata = self._download_metadata(url, video_id, lang, 'eventFallbackData')
|
||||||
|
|
||||||
info = traverse_obj(metadata, {
|
info = {
|
||||||
'title': ('displayName', {str}),
|
'id': video_id,
|
||||||
'description': ('description', {str}),
|
**traverse_obj(metadata, {
|
||||||
'channel': ('labels', 'group', {str}),
|
'title': ('displayName', {str}),
|
||||||
'location': ('labels', 'venue', {str}),
|
'description': ('description', {str}),
|
||||||
'timestamp': ('startTime', {int_or_none}),
|
'channel': ('labels', 'group', {str}),
|
||||||
'thumbnails': (('keyVisualUrl', 'alterKeyVisualUrl', 'heroKeyVisualUrl'), {'url': {url_or_none}}),
|
'location': ('labels', 'venue', {str}),
|
||||||
})
|
'timestamp': ('startTime', {int_or_none}),
|
||||||
|
'thumbnails': (('keyVisualUrl', 'alterKeyVisualUrl', 'heroKeyVisualUrl'), {'url': {url_or_none}}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
ended_time = traverse_obj(metadata, ('endedTime', {int_or_none}))
|
ended_time = traverse_obj(metadata, ('endedTime', {int_or_none}))
|
||||||
if info.get('timestamp') and ended_time:
|
if info.get('timestamp') and ended_time:
|
||||||
|
@ -211,23 +281,20 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
video_data, decrypt = self._call_encrypted_api(
|
video_data, decrypt = self._call_encrypted_api(
|
||||||
video_id, ':watchArchive', 'watch archive', data={'method': 1})
|
video_id, ':watchArchive', 'watch archive', data={'method': 1})
|
||||||
formats = self._get_formats(video_data, (
|
info['formats'] = self._get_formats(video_data, (
|
||||||
('hls', None), ('urls', 'chromecastUrls'), ..., {url_or_none}), video_id)
|
('hls', None), ('urls', 'chromecastUrls'), ..., {url_or_none}), video_id)
|
||||||
for f in formats:
|
for f in info['formats']:
|
||||||
# bitrates are exaggerated in PPV playlists, so avoid wrong/huge filesize_approx values
|
# bitrates are exaggerated in PPV playlists, so avoid wrong/huge filesize_approx values
|
||||||
if f.get('tbr'):
|
if f.get('tbr'):
|
||||||
f['tbr'] = int(f['tbr'] / 2.5)
|
f['tbr'] = int(f['tbr'] / 2.5)
|
||||||
|
|
||||||
hls_aes_key = traverse_obj(video_data, ('hls', 'key', {decrypt}))
|
hls_aes_key = traverse_obj(video_data, ('hls', 'key', {decrypt}))
|
||||||
if not hls_aes_key and traverse_obj(video_data, ('hls', 'encryptType', {int}), default=0) > 0:
|
if hls_aes_key:
|
||||||
self.report_warning('HLS AES-128 key was not found in API response')
|
info['hls_aes'] = {
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'formats': formats,
|
|
||||||
'hls_aes': {
|
|
||||||
'key': hls_aes_key,
|
'key': hls_aes_key,
|
||||||
'iv': traverse_obj(video_data, ('hls', 'iv', {decrypt})),
|
'iv': traverse_obj(video_data, ('hls', 'iv', {decrypt})),
|
||||||
},
|
},
|
||||||
**info,
|
elif traverse_obj(video_data, ('hls', 'encryptType', {int})):
|
||||||
}
|
self.report_warning('HLS AES-128 key was not found in API response')
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
268
yt_dlp/extractor/wykop.py
Normal file
268
yt_dlp/extractor/wykop.py
Normal file
|
@ -0,0 +1,268 @@
|
||||||
|
import json
|
||||||
|
import urllib.error
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
format_field,
|
||||||
|
parse_iso8601,
|
||||||
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class WykopBaseExtractor(InfoExtractor):
|
||||||
|
def _get_token(self, force_refresh=False):
|
||||||
|
if not force_refresh:
|
||||||
|
maybe_cached = self.cache.load('wykop', 'bearer')
|
||||||
|
if maybe_cached:
|
||||||
|
return maybe_cached
|
||||||
|
|
||||||
|
new_token = traverse_obj(
|
||||||
|
self._do_call_api('auth', None, 'Downloading anonymous auth token', data={
|
||||||
|
# hardcoded in frontend
|
||||||
|
'key': 'w53947240748',
|
||||||
|
'secret': 'd537d9e0a7adc1510842059ae5316419',
|
||||||
|
}), ('data', 'token'))
|
||||||
|
|
||||||
|
self.cache.store('wykop', 'bearer', new_token)
|
||||||
|
return new_token
|
||||||
|
|
||||||
|
def _do_call_api(self, path, video_id, note='Downloading JSON metadata', data=None, headers={}):
|
||||||
|
if data:
|
||||||
|
data = json.dumps({'data': data}).encode()
|
||||||
|
headers['Content-Type'] = 'application/json'
|
||||||
|
|
||||||
|
return self._download_json(
|
||||||
|
f'https://wykop.pl/api/v3/{path}', video_id,
|
||||||
|
note=note, data=data, headers=headers)
|
||||||
|
|
||||||
|
def _call_api(self, path, video_id, note='Downloading JSON metadata'):
|
||||||
|
token = self._get_token()
|
||||||
|
for retrying in range(2):
|
||||||
|
try:
|
||||||
|
return self._do_call_api(path, video_id, note, headers={'Authorization': f'Bearer {token}'})
|
||||||
|
except ExtractorError as e:
|
||||||
|
if not retrying and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 403:
|
||||||
|
token = self._get_token(True)
|
||||||
|
continue
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _common_data_extract(self, data):
|
||||||
|
author = traverse_obj(data, ('author', 'username'), expected_type=str)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'display_id': data.get('slug'),
|
||||||
|
'url': traverse_obj(data,
|
||||||
|
('media', 'embed', 'url'), # what gets an iframe embed
|
||||||
|
('source', 'url'), # clickable url (dig only)
|
||||||
|
expected_type=url_or_none),
|
||||||
|
'thumbnail': traverse_obj(
|
||||||
|
data, ('media', 'photo', 'url'), ('media', 'embed', 'thumbnail'), expected_type=url_or_none),
|
||||||
|
'uploader': author,
|
||||||
|
'uploader_id': author,
|
||||||
|
'uploader_url': format_field(author, None, 'https://wykop.pl/ludzie/%s'),
|
||||||
|
'timestamp': parse_iso8601(data.get('created_at'), delimiter=' '), # time it got submitted
|
||||||
|
'like_count': traverse_obj(data, ('votes', 'up'), expected_type=int),
|
||||||
|
'dislike_count': traverse_obj(data, ('votes', 'down'), expected_type=int),
|
||||||
|
'comment_count': traverse_obj(data, ('comments', 'count'), expected_type=int),
|
||||||
|
'age_limit': 18 if data.get('adult') else 0,
|
||||||
|
'tags': data.get('tags'),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class WykopDigIE(WykopBaseExtractor):
|
||||||
|
IE_NAME = 'wykop:dig'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?wykop\.pl/link/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://wykop.pl/link/6912923/najbardziej-zrzedliwy-kot-na-swiecie-i-frozen-planet-ii-i-bbc-earth',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'rlSTBvViflc',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Najbardziej zrzędliwy kot na świecie I Frozen Planet II I BBC Earth',
|
||||||
|
'display_id': 'najbardziej-zrzedliwy-kot-na-swiecie-i-frozen-planet-ii-i-bbc-earth',
|
||||||
|
'description': 'md5:ac0f87dea1cdcb6b0c53f3612a095c87',
|
||||||
|
'tags': ['zwierzaczki', 'koty', 'smiesznykotek', 'humor', 'rozrywka', 'ciekawostki'],
|
||||||
|
'age_limit': 0,
|
||||||
|
'timestamp': 1669154480,
|
||||||
|
'release_timestamp': 1669194241,
|
||||||
|
'release_date': '20221123',
|
||||||
|
'uploader': 'starnak',
|
||||||
|
'uploader_id': 'starnak',
|
||||||
|
'uploader_url': 'https://wykop.pl/ludzie/starnak',
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'thumbnail': r're:https?://wykop\.pl/cdn/.+',
|
||||||
|
'view_count': int,
|
||||||
|
'channel': 'BBC Earth',
|
||||||
|
'channel_id': 'UCwmZiChSryoWQCZMIQezgTg',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCwmZiChSryoWQCZMIQezgTg',
|
||||||
|
'categories': ['Pets & Animals'],
|
||||||
|
'upload_date': '20220923',
|
||||||
|
'duration': 191,
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'availability': 'public',
|
||||||
|
'live_status': 'not_live',
|
||||||
|
'playable_in_embed': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return cls._match_valid_url(url) and not WykopDigCommentIE.suitable(url)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
data = self._call_api(f'links/{video_id}', video_id)['data']
|
||||||
|
|
||||||
|
return {
|
||||||
|
**self._common_data_extract(data),
|
||||||
|
'id': video_id,
|
||||||
|
'title': data['title'],
|
||||||
|
'description': data.get('description'),
|
||||||
|
# time it got "digged" to the homepage
|
||||||
|
'release_timestamp': parse_iso8601(data.get('published_at'), delimiter=' '),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class WykopDigCommentIE(WykopBaseExtractor):
|
||||||
|
IE_NAME = 'wykop:dig:comment'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?wykop\.pl/link/(?P<dig_id>\d+)/[^/]+/komentarz/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://wykop.pl/link/6992589/strollowal-oszusta-przez-ponad-24-minuty-udawal-naiwniaka-i-nagral-rozmowe/komentarz/114540527/podobna-sytuacja-ponizej-ciekawa-dyskusja-z-oszustem-na-sam-koniec-sam-bylem-w-biurze-swiadkiem-podobnej-rozmowy-niemal-zakonczonej-sukcesem-bandyty-g',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'u6tEi2FmKZY',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:e7c741c5baa7ed6478000caf72865577',
|
||||||
|
'display_id': 'md5:45b2d12bd0e262d09cc7cf7abc8412db',
|
||||||
|
'description': 'md5:bcec7983429f9c0630f9deb9d3d1ba5e',
|
||||||
|
'timestamp': 1674476945,
|
||||||
|
'uploader': 'Bartholomew',
|
||||||
|
'uploader_id': 'Bartholomew',
|
||||||
|
'uploader_url': 'https://wykop.pl/ludzie/Bartholomew',
|
||||||
|
'thumbnail': r're:https?://wykop\.pl/cdn/.+',
|
||||||
|
'tags': [],
|
||||||
|
'availability': 'public',
|
||||||
|
'duration': 1838,
|
||||||
|
'upload_date': '20230117',
|
||||||
|
'categories': ['Entertainment'],
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'playable_in_embed': True,
|
||||||
|
'live_status': 'not_live',
|
||||||
|
'age_limit': 0,
|
||||||
|
'chapters': 'count:3',
|
||||||
|
'channel': 'Poszukiwacze Okazji',
|
||||||
|
'channel_id': 'UCzzvJDZThwv06dR4xmzrZBw',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCzzvJDZThwv06dR4xmzrZBw',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
dig_id, comment_id = self._search_regex(self._VALID_URL, url, 'dig and comment ids', group=('dig_id', 'id'))
|
||||||
|
data = self._call_api(f'links/{dig_id}/comments/{comment_id}', comment_id)['data']
|
||||||
|
|
||||||
|
return {
|
||||||
|
**self._common_data_extract(data),
|
||||||
|
'id': comment_id,
|
||||||
|
'title': f"{traverse_obj(data, ('author', 'username'))} - {data.get('content') or ''}",
|
||||||
|
'description': data.get('content'),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class WykopPostIE(WykopBaseExtractor):
|
||||||
|
IE_NAME = 'wykop:post'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?wykop\.pl/wpis/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://wykop.pl/wpis/68893343/kot-koty-smiesznykotek',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'PL8JMjiUPHUhwc9ZlKa_5IFeBwBV8Xe7jI',
|
||||||
|
'title': 'PawelW124 - #kot #koty #smiesznykotek',
|
||||||
|
'description': '#kot #koty #smiesznykotek',
|
||||||
|
'display_id': 'kot-koty-smiesznykotek',
|
||||||
|
'tags': ['kot', 'koty', 'smiesznykotek'],
|
||||||
|
'uploader': 'PawelW124',
|
||||||
|
'uploader_id': 'PawelW124',
|
||||||
|
'uploader_url': 'https://wykop.pl/ludzie/PawelW124',
|
||||||
|
'timestamp': 1668938142,
|
||||||
|
'age_limit': 0,
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'thumbnail': r're:https?://wykop\.pl/cdn/.+',
|
||||||
|
'comment_count': int,
|
||||||
|
'channel': 'Revan',
|
||||||
|
'channel_id': 'UCW9T_-uZoiI7ROARQdTDyOw',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCW9T_-uZoiI7ROARQdTDyOw',
|
||||||
|
'upload_date': '20221120',
|
||||||
|
'modified_date': '20220814',
|
||||||
|
'availability': 'public',
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
'playlist_mincount': 15,
|
||||||
|
'params': {
|
||||||
|
'flat_playlist': True,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return cls._match_valid_url(url) and not WykopPostCommentIE.suitable(url)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
data = self._call_api(f'entries/{video_id}', video_id)['data']
|
||||||
|
|
||||||
|
return {
|
||||||
|
**self._common_data_extract(data),
|
||||||
|
'id': video_id,
|
||||||
|
'title': f"{traverse_obj(data, ('author', 'username'))} - {data.get('content') or ''}",
|
||||||
|
'description': data.get('content'),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class WykopPostCommentIE(WykopBaseExtractor):
|
||||||
|
IE_NAME = 'wykop:post:comment'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?wykop\.pl/wpis/(?P<post_id>\d+)/[^/#]+#(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://wykop.pl/wpis/70084873/test-test-test#249303979',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'confusedquickarmyant',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'tpap - treść komentarza',
|
||||||
|
'display_id': 'tresc-komentarza',
|
||||||
|
'description': 'treść komentarza',
|
||||||
|
'uploader': 'tpap',
|
||||||
|
'uploader_id': 'tpap',
|
||||||
|
'uploader_url': 'https://wykop.pl/ludzie/tpap',
|
||||||
|
'timestamp': 1675349470,
|
||||||
|
'upload_date': '20230202',
|
||||||
|
'tags': [],
|
||||||
|
'duration': 2.12,
|
||||||
|
'age_limit': 0,
|
||||||
|
'categories': [],
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'thumbnail': r're:https?://wykop\.pl/cdn/.+',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
post_id, comment_id = self._search_regex(self._VALID_URL, url, 'post and comment ids', group=('post_id', 'id'))
|
||||||
|
data = self._call_api(f'entries/{post_id}/comments/{comment_id}', comment_id)['data']
|
||||||
|
|
||||||
|
return {
|
||||||
|
**self._common_data_extract(data),
|
||||||
|
'id': comment_id,
|
||||||
|
'title': f"{traverse_obj(data, ('author', 'username'))} - {data.get('content') or ''}",
|
||||||
|
'description': data.get('content'),
|
||||||
|
}
|
|
@ -66,7 +66,6 @@
|
||||||
variadic,
|
variadic,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
|
STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
|
||||||
# any clients starting with _ cannot be explicitly requested by the user
|
# any clients starting with _ cannot be explicitly requested by the user
|
||||||
INNERTUBE_CLIENTS = {
|
INNERTUBE_CLIENTS = {
|
||||||
|
@ -894,9 +893,16 @@ def _extract_thumbnails(data, *path_list):
|
||||||
def extract_relative_time(relative_time_text):
|
def extract_relative_time(relative_time_text):
|
||||||
"""
|
"""
|
||||||
Extracts a relative time from string and converts to dt object
|
Extracts a relative time from string and converts to dt object
|
||||||
e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
|
e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
|
||||||
"""
|
"""
|
||||||
mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
|
|
||||||
|
# XXX: this could be moved to a general function in utils.py
|
||||||
|
# The relative time text strings are roughly the same as what
|
||||||
|
# Javascript's Intl.RelativeTimeFormat function generates.
|
||||||
|
# See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
|
||||||
|
mobj = re.search(
|
||||||
|
r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>sec(?:ond)?|s|min(?:ute)?|h(?:our|r)?|d(?:ay)?|w(?:eek|k)?|mo(?:nth)?|y(?:ear|r)?)s?\s*ago',
|
||||||
|
relative_time_text)
|
||||||
if mobj:
|
if mobj:
|
||||||
start = mobj.group('start')
|
start = mobj.group('start')
|
||||||
if start:
|
if start:
|
||||||
|
@ -1039,6 +1045,13 @@ def _extract_video(self, renderer):
|
||||||
else self._get_count({'simpleText': view_count_text}))
|
else self._get_count({'simpleText': view_count_text}))
|
||||||
view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
|
view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
|
||||||
|
|
||||||
|
channel = (self._get_text(renderer, 'ownerText', 'shortBylineText')
|
||||||
|
or self._get_text(reel_header_renderer, 'channelTitleText'))
|
||||||
|
|
||||||
|
channel_handle = traverse_obj(renderer, (
|
||||||
|
'shortBylineText', 'runs', ..., 'navigationEndpoint',
|
||||||
|
(('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl'))),
|
||||||
|
expected_type=self.handle_from_url, get_all=False)
|
||||||
return {
|
return {
|
||||||
'_type': 'url',
|
'_type': 'url',
|
||||||
'ie_key': YoutubeIE.ie_key(),
|
'ie_key': YoutubeIE.ie_key(),
|
||||||
|
@ -1048,9 +1061,11 @@ def _extract_video(self, renderer):
|
||||||
'description': description,
|
'description': description,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'channel_id': channel_id,
|
'channel_id': channel_id,
|
||||||
'channel': (self._get_text(renderer, 'ownerText', 'shortBylineText')
|
'channel': channel,
|
||||||
or self._get_text(reel_header_renderer, 'channelTitleText')),
|
|
||||||
'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
|
'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
|
||||||
|
'uploader': channel,
|
||||||
|
'uploader_id': channel_handle,
|
||||||
|
'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
|
||||||
'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
|
'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
|
||||||
'timestamp': (self._parse_time_text(time_text)
|
'timestamp': (self._parse_time_text(time_text)
|
||||||
if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
|
if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
|
||||||
|
@ -1274,6 +1289,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'uploader': 'Philipp Hagemeister',
|
'uploader': 'Philipp Hagemeister',
|
||||||
'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
|
'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
|
||||||
'uploader_id': '@PhilippHagemeister',
|
'uploader_id': '@PhilippHagemeister',
|
||||||
|
'heatmap': 'count:100',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -1427,6 +1443,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'uploader': 'FlyingKitty',
|
'uploader': 'FlyingKitty',
|
||||||
'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
|
'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
|
||||||
'uploader_id': '@FlyingKitty900',
|
'uploader_id': '@FlyingKitty900',
|
||||||
|
'comment_count': int,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -3023,17 +3040,14 @@ def _parse_sig_js(self, jscode):
|
||||||
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||||
r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
|
r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
|
||||||
r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
|
r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
|
||||||
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
|
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
|
||||||
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
|
||||||
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
||||||
# Obsolete patterns
|
# Obsolete patterns
|
||||||
r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||||
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
|
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||||
r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||||
r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||||
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||||
r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
|
||||||
r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
|
||||||
r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
|
r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
|
||||||
jscode, 'Initial JS player signature function name', group='sig')
|
jscode, 'Initial JS player signature function name', group='sig')
|
||||||
|
|
||||||
|
@ -3277,42 +3291,66 @@ def _extract_chapters_from_engagement_panel(self, data, duration):
|
||||||
chapter_time, chapter_title, duration)
|
chapter_time, chapter_title, duration)
|
||||||
for contents in content_list)), [])
|
for contents in content_list)), [])
|
||||||
|
|
||||||
|
def _extract_heatmap_from_player_overlay(self, data):
|
||||||
|
content_list = traverse_obj(data, (
|
||||||
|
'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer', 'decoratedPlayerBarRenderer', 'playerBar',
|
||||||
|
'multiMarkersPlayerBarRenderer', 'markersMap', ..., 'value', 'heatmap', 'heatmapRenderer', 'heatMarkers', {list}))
|
||||||
|
return next(filter(None, (
|
||||||
|
traverse_obj(contents, (..., 'heatMarkerRenderer', {
|
||||||
|
'start_time': ('timeRangeStartMillis', {functools.partial(float_or_none, scale=1000)}),
|
||||||
|
'end_time': {lambda x: (x['timeRangeStartMillis'] + x['markerDurationMillis']) / 1000},
|
||||||
|
'value': ('heatMarkerIntensityScoreNormalized', {float_or_none}),
|
||||||
|
})) for contents in content_list)), None)
|
||||||
|
|
||||||
def _extract_comment(self, comment_renderer, parent=None):
|
def _extract_comment(self, comment_renderer, parent=None):
|
||||||
comment_id = comment_renderer.get('commentId')
|
comment_id = comment_renderer.get('commentId')
|
||||||
if not comment_id:
|
if not comment_id:
|
||||||
return
|
return
|
||||||
|
|
||||||
text = self._get_text(comment_renderer, 'contentText')
|
info = {
|
||||||
|
'id': comment_id,
|
||||||
|
'text': self._get_text(comment_renderer, 'contentText'),
|
||||||
|
'like_count': self._get_count(comment_renderer, 'voteCount'),
|
||||||
|
'author_id': traverse_obj(comment_renderer, ('authorEndpoint', 'browseEndpoint', 'browseId', {self.ucid_or_none})),
|
||||||
|
'author': self._get_text(comment_renderer, 'authorText'),
|
||||||
|
'author_thumbnail': traverse_obj(comment_renderer, ('authorThumbnail', 'thumbnails', -1, 'url', {url_or_none})),
|
||||||
|
'parent': parent or 'root',
|
||||||
|
}
|
||||||
|
|
||||||
# Timestamp is an estimate calculated from the current time and time_text
|
# Timestamp is an estimate calculated from the current time and time_text
|
||||||
time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
|
time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
|
||||||
timestamp = self._parse_time_text(time_text)
|
timestamp = self._parse_time_text(time_text)
|
||||||
|
|
||||||
author = self._get_text(comment_renderer, 'authorText')
|
info.update({
|
||||||
author_id = try_get(comment_renderer,
|
# FIXME: non-standard, but we need a way of showing that it is an estimate.
|
||||||
lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
|
'_time_text': time_text,
|
||||||
|
|
||||||
votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
|
|
||||||
lambda x: x['likeCount']), str)) or 0
|
|
||||||
author_thumbnail = try_get(comment_renderer,
|
|
||||||
lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
|
|
||||||
|
|
||||||
author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
|
|
||||||
is_favorited = 'creatorHeart' in (try_get(
|
|
||||||
comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
|
|
||||||
return {
|
|
||||||
'id': comment_id,
|
|
||||||
'text': text,
|
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'time_text': time_text,
|
})
|
||||||
'like_count': votes,
|
|
||||||
'is_favorited': is_favorited,
|
info['author_url'] = urljoin(
|
||||||
'author': author,
|
'https://www.youtube.com', traverse_obj(comment_renderer, ('authorEndpoint', (
|
||||||
'author_id': author_id,
|
('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'))),
|
||||||
'author_thumbnail': author_thumbnail,
|
expected_type=str, get_all=False))
|
||||||
'author_is_uploader': author_is_uploader,
|
|
||||||
'parent': parent or 'root'
|
author_is_uploader = traverse_obj(comment_renderer, 'authorIsChannelOwner')
|
||||||
}
|
if author_is_uploader is not None:
|
||||||
|
info['author_is_uploader'] = author_is_uploader
|
||||||
|
|
||||||
|
comment_abr = traverse_obj(
|
||||||
|
comment_renderer, ('actionsButtons', 'commentActionButtonsRenderer'), expected_type=dict)
|
||||||
|
if comment_abr is not None:
|
||||||
|
info['is_favorited'] = 'creatorHeart' in comment_abr
|
||||||
|
|
||||||
|
comment_ab_icontype = traverse_obj(
|
||||||
|
comment_renderer, ('authorCommentBadge', 'authorCommentBadgeRenderer', 'icon', 'iconType'))
|
||||||
|
if comment_ab_icontype is not None:
|
||||||
|
info['author_is_verified'] = comment_ab_icontype in ('CHECK_CIRCLE_THICK', 'OFFICIAL_ARTIST_BADGE')
|
||||||
|
|
||||||
|
is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge')
|
||||||
|
if is_pinned:
|
||||||
|
info['is_pinned'] = True
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
||||||
def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
|
def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
|
||||||
|
|
||||||
|
@ -3325,7 +3363,7 @@ def extract_header(contents):
|
||||||
expected_comment_count = self._get_count(
|
expected_comment_count = self._get_count(
|
||||||
comments_header_renderer, 'countText', 'commentsCount')
|
comments_header_renderer, 'countText', 'commentsCount')
|
||||||
|
|
||||||
if expected_comment_count:
|
if expected_comment_count is not None:
|
||||||
tracker['est_total'] = expected_comment_count
|
tracker['est_total'] = expected_comment_count
|
||||||
self.to_screen(f'Downloading ~{expected_comment_count} comments')
|
self.to_screen(f'Downloading ~{expected_comment_count} comments')
|
||||||
comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
|
comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
|
||||||
|
@ -3360,14 +3398,13 @@ def extract_thread(contents):
|
||||||
comment = self._extract_comment(comment_renderer, parent)
|
comment = self._extract_comment(comment_renderer, parent)
|
||||||
if not comment:
|
if not comment:
|
||||||
continue
|
continue
|
||||||
is_pinned = bool(traverse_obj(comment_renderer, 'pinnedCommentBadge'))
|
|
||||||
comment_id = comment['id']
|
comment_id = comment['id']
|
||||||
if is_pinned:
|
if comment.get('is_pinned'):
|
||||||
tracker['pinned_comment_ids'].add(comment_id)
|
tracker['pinned_comment_ids'].add(comment_id)
|
||||||
# Sometimes YouTube may break and give us infinite looping comments.
|
# Sometimes YouTube may break and give us infinite looping comments.
|
||||||
# See: https://github.com/yt-dlp/yt-dlp/issues/6290
|
# See: https://github.com/yt-dlp/yt-dlp/issues/6290
|
||||||
if comment_id in tracker['seen_comment_ids']:
|
if comment_id in tracker['seen_comment_ids']:
|
||||||
if comment_id in tracker['pinned_comment_ids'] and not is_pinned:
|
if comment_id in tracker['pinned_comment_ids'] and not comment.get('is_pinned'):
|
||||||
# Pinned comments may appear a second time in newest first sort
|
# Pinned comments may appear a second time in newest first sort
|
||||||
# See: https://github.com/yt-dlp/yt-dlp/issues/6712
|
# See: https://github.com/yt-dlp/yt-dlp/issues/6712
|
||||||
continue
|
continue
|
||||||
|
@ -3396,7 +3433,7 @@ def extract_thread(contents):
|
||||||
if not tracker:
|
if not tracker:
|
||||||
tracker = dict(
|
tracker = dict(
|
||||||
running_total=0,
|
running_total=0,
|
||||||
est_total=0,
|
est_total=None,
|
||||||
current_page_thread=0,
|
current_page_thread=0,
|
||||||
total_parent_comments=0,
|
total_parent_comments=0,
|
||||||
total_reply_comments=0,
|
total_reply_comments=0,
|
||||||
|
@ -3429,11 +3466,13 @@ def extract_thread(contents):
|
||||||
continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
|
continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
|
||||||
is_forced_continuation = True
|
is_forced_continuation = True
|
||||||
|
|
||||||
|
continuation_items_path = (
|
||||||
|
'onResponseReceivedEndpoints', ..., ('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems')
|
||||||
for page_num in itertools.count(0):
|
for page_num in itertools.count(0):
|
||||||
if not continuation:
|
if not continuation:
|
||||||
break
|
break
|
||||||
headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
|
headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
|
||||||
comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
|
comment_prog_str = f"({tracker['running_total']}/~{tracker['est_total']})"
|
||||||
if page_num == 0:
|
if page_num == 0:
|
||||||
if is_first_continuation:
|
if is_first_continuation:
|
||||||
note_prefix = 'Downloading comment section API JSON'
|
note_prefix = 'Downloading comment section API JSON'
|
||||||
|
@ -3444,11 +3483,18 @@ def extract_thread(contents):
|
||||||
note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
|
note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
|
||||||
' ' if parent else '', ' replies' if parent else '',
|
' ' if parent else '', ' replies' if parent else '',
|
||||||
page_num, comment_prog_str)
|
page_num, comment_prog_str)
|
||||||
|
|
||||||
|
# Do a deep check for incomplete data as sometimes YouTube may return no comments for a continuation
|
||||||
|
# Ignore check if YouTube says the comment count is 0.
|
||||||
|
check_get_keys = None
|
||||||
|
if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
|
||||||
|
check_get_keys = [[*continuation_items_path, ..., (
|
||||||
|
'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentRenderer'))]]
|
||||||
try:
|
try:
|
||||||
response = self._extract_response(
|
response = self._extract_response(
|
||||||
item_id=None, query=continuation,
|
item_id=None, query=continuation,
|
||||||
ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
|
ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
|
||||||
check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
|
check_get_keys=check_get_keys)
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
# Ignore incomplete data error for replies if retries didn't work.
|
# Ignore incomplete data error for replies if retries didn't work.
|
||||||
# This is to allow any other parent comments and comment threads to be downloaded.
|
# This is to allow any other parent comments and comment threads to be downloaded.
|
||||||
|
@ -3460,15 +3506,8 @@ def extract_thread(contents):
|
||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
is_forced_continuation = False
|
is_forced_continuation = False
|
||||||
continuation_contents = traverse_obj(
|
|
||||||
response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
|
|
||||||
|
|
||||||
continuation = None
|
continuation = None
|
||||||
for continuation_section in continuation_contents:
|
for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
|
||||||
continuation_items = traverse_obj(
|
|
||||||
continuation_section,
|
|
||||||
(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
|
|
||||||
get_all=False, expected_type=list) or []
|
|
||||||
if is_first_continuation:
|
if is_first_continuation:
|
||||||
continuation = extract_header(continuation_items)
|
continuation = extract_header(continuation_items)
|
||||||
is_first_continuation = False
|
is_first_continuation = False
|
||||||
|
@ -4349,6 +4388,8 @@ def process_language(container, base_url, lang_code, sub_name, query):
|
||||||
or self._extract_chapters_from_description(video_description, duration)
|
or self._extract_chapters_from_description(video_description, duration)
|
||||||
or None)
|
or None)
|
||||||
|
|
||||||
|
info['heatmap'] = self._extract_heatmap_from_player_overlay(initial_data)
|
||||||
|
|
||||||
contents = traverse_obj(
|
contents = traverse_obj(
|
||||||
initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
|
initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
|
||||||
expected_type=list, default=[])
|
expected_type=list, default=[])
|
||||||
|
@ -4611,8 +4652,11 @@ def _grid_entries(self, grid_renderer):
|
||||||
def _music_reponsive_list_entry(self, renderer):
|
def _music_reponsive_list_entry(self, renderer):
|
||||||
video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
|
video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
|
||||||
if video_id:
|
if video_id:
|
||||||
|
title = traverse_obj(renderer, (
|
||||||
|
'flexColumns', 0, 'musicResponsiveListItemFlexColumnRenderer',
|
||||||
|
'text', 'runs', 0, 'text'))
|
||||||
return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
|
return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
|
||||||
ie=YoutubeIE.ie_key(), video_id=video_id)
|
ie=YoutubeIE.ie_key(), video_id=video_id, title=title)
|
||||||
playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
|
playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
|
||||||
if playlist_id:
|
if playlist_id:
|
||||||
video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
|
video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
|
||||||
|
@ -4671,11 +4715,19 @@ def _playlist_entries(self, video_list_renderer):
|
||||||
|
|
||||||
def _rich_entries(self, rich_grid_renderer):
|
def _rich_entries(self, rich_grid_renderer):
|
||||||
renderer = traverse_obj(
|
renderer = traverse_obj(
|
||||||
rich_grid_renderer, ('content', ('videoRenderer', 'reelItemRenderer')), get_all=False) or {}
|
rich_grid_renderer,
|
||||||
|
('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer')), get_all=False) or {}
|
||||||
video_id = renderer.get('videoId')
|
video_id = renderer.get('videoId')
|
||||||
if not video_id:
|
if video_id:
|
||||||
|
yield self._extract_video(renderer)
|
||||||
|
return
|
||||||
|
playlist_id = renderer.get('playlistId')
|
||||||
|
if playlist_id:
|
||||||
|
yield self.url_result(
|
||||||
|
f'https://www.youtube.com/playlist?list={playlist_id}',
|
||||||
|
ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
|
||||||
|
video_title=self._get_text(renderer, 'title'))
|
||||||
return
|
return
|
||||||
yield self._extract_video(renderer)
|
|
||||||
|
|
||||||
def _video_entry(self, video_renderer):
|
def _video_entry(self, video_renderer):
|
||||||
video_id = video_renderer.get('videoId')
|
video_id = video_renderer.get('videoId')
|
||||||
|
@ -4904,7 +4956,7 @@ def _extract_metadata_from_tabs(self, item_id, data):
|
||||||
metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
|
metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
|
||||||
if metadata_renderer:
|
if metadata_renderer:
|
||||||
channel_id = traverse_obj(metadata_renderer, ('externalId', {self.ucid_or_none}),
|
channel_id = traverse_obj(metadata_renderer, ('externalId', {self.ucid_or_none}),
|
||||||
('channelUrl', {self.ucid_from_url}))
|
('channelUrl', {self.ucid_from_url}))
|
||||||
info.update({
|
info.update({
|
||||||
'channel': metadata_renderer.get('title'),
|
'channel': metadata_renderer.get('title'),
|
||||||
'channel_id': channel_id,
|
'channel_id': channel_id,
|
||||||
|
@ -5861,7 +5913,25 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||||
'uploader_id': '@colethedj1894',
|
'uploader_id': '@colethedj1894',
|
||||||
'uploader': 'colethedj',
|
'uploader': 'colethedj',
|
||||||
},
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'youtube-dl test video "\'/\\ä↭𝕐',
|
||||||
|
'id': 'BaW_jenozKc',
|
||||||
|
'_type': 'url',
|
||||||
|
'ie_key': 'Youtube',
|
||||||
|
'duration': 10,
|
||||||
|
'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
|
||||||
|
'view_count': int,
|
||||||
|
'url': 'https://www.youtube.com/watch?v=BaW_jenozKc',
|
||||||
|
'channel': 'Philipp Hagemeister',
|
||||||
|
'uploader_id': '@PhilippHagemeister',
|
||||||
|
'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
|
||||||
|
'uploader': 'Philipp Hagemeister',
|
||||||
|
}
|
||||||
|
}],
|
||||||
'playlist_count': 1,
|
'playlist_count': 1,
|
||||||
|
'params': {'extract_flat': True},
|
||||||
}, {
|
}, {
|
||||||
'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
|
'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
|
||||||
'url': 'https://www.youtube.com/feed/recommended',
|
'url': 'https://www.youtube.com/feed/recommended',
|
||||||
|
@ -6162,6 +6232,9 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||||
'channel_url': str,
|
'channel_url': str,
|
||||||
'concurrent_view_count': int,
|
'concurrent_view_count': int,
|
||||||
'channel': str,
|
'channel': str,
|
||||||
|
'uploader': str,
|
||||||
|
'uploader_url': str,
|
||||||
|
'uploader_id': str
|
||||||
}
|
}
|
||||||
}],
|
}],
|
||||||
'params': {'extract_flat': True, 'playlist_items': '1'},
|
'params': {'extract_flat': True, 'playlist_items': '1'},
|
||||||
|
@ -6217,6 +6290,40 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||||
'uploader': '3Blue1Brown',
|
'uploader': '3Blue1Brown',
|
||||||
},
|
},
|
||||||
'playlist_count': 0,
|
'playlist_count': 0,
|
||||||
|
}, {
|
||||||
|
# Podcasts tab, with rich entry playlistRenderers
|
||||||
|
'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
|
||||||
|
'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
|
||||||
|
'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',
|
||||||
|
'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',
|
||||||
|
'title': '99 Percent Invisible - Podcasts',
|
||||||
|
'uploader': '99 Percent Invisible',
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',
|
||||||
|
'tags': [],
|
||||||
|
'channel': '99 Percent Invisible',
|
||||||
|
'uploader_id': '@99percentinvisiblepodcast',
|
||||||
|
},
|
||||||
|
'playlist_count': 1,
|
||||||
|
}, {
|
||||||
|
# Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
|
||||||
|
'url': 'https://www.youtube.com/@AHimitsu/releases',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
|
||||||
|
'channel': 'A Himitsu',
|
||||||
|
'uploader_url': 'https://www.youtube.com/@AHimitsu',
|
||||||
|
'title': 'A Himitsu - Releases',
|
||||||
|
'uploader_id': '@AHimitsu',
|
||||||
|
'uploader': 'A Himitsu',
|
||||||
|
'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A',
|
||||||
|
'tags': 'count:16',
|
||||||
|
'description': 'I make music',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
|
||||||
|
'channel_follower_count': int,
|
||||||
|
},
|
||||||
|
'playlist_mincount': 10,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
|
@ -1,16 +1,11 @@
|
||||||
import functools
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import hmac
|
import hmac
|
||||||
|
import itertools
|
||||||
import json
|
import json
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import int_or_none, traverse_obj, try_call, urljoin
|
||||||
OnDemandPagedList,
|
|
||||||
int_or_none,
|
|
||||||
traverse_obj,
|
|
||||||
urljoin,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ZingMp3BaseIE(InfoExtractor):
|
class ZingMp3BaseIE(InfoExtractor):
|
||||||
|
@ -37,6 +32,7 @@ class ZingMp3BaseIE(InfoExtractor):
|
||||||
'info-artist': '/api/v2/page/get/artist',
|
'info-artist': '/api/v2/page/get/artist',
|
||||||
'user-list-song': '/api/v2/song/get/list',
|
'user-list-song': '/api/v2/song/get/list',
|
||||||
'user-list-video': '/api/v2/video/get/list',
|
'user-list-video': '/api/v2/video/get/list',
|
||||||
|
'hub': '/api/v2/page/get/hub-detail',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _api_url(self, url_type, params):
|
def _api_url(self, url_type, params):
|
||||||
|
@ -46,9 +42,9 @@ def _api_url(self, url_type, params):
|
||||||
''.join(f'{k}={v}' for k, v in sorted(params.items())).encode()).hexdigest()
|
''.join(f'{k}={v}' for k, v in sorted(params.items())).encode()).hexdigest()
|
||||||
data = {
|
data = {
|
||||||
**params,
|
**params,
|
||||||
'apiKey': '88265e23d4284f25963e6eedac8fbfa3',
|
'apiKey': 'X5BM3w8N7MKozC0B85o4KMlzLZKhV00y',
|
||||||
'sig': hmac.new(
|
'sig': hmac.new(b'acOrvUS15XRW2o9JksiK1KgQ6Vbds8ZW',
|
||||||
b'2aa2d1c561e809b267f3638c4a307aab', f'{api_slug}{sha256}'.encode(), hashlib.sha512).hexdigest(),
|
f'{api_slug}{sha256}'.encode(), hashlib.sha512).hexdigest(),
|
||||||
}
|
}
|
||||||
return f'{self._DOMAIN}{api_slug}?{urllib.parse.urlencode(data)}'
|
return f'{self._DOMAIN}{api_slug}?{urllib.parse.urlencode(data)}'
|
||||||
|
|
||||||
|
@ -67,6 +63,19 @@ def _parse_items(self, items):
|
||||||
for url in traverse_obj(items, (..., 'link')) or []:
|
for url in traverse_obj(items, (..., 'link')) or []:
|
||||||
yield self.url_result(urljoin(self._DOMAIN, url))
|
yield self.url_result(urljoin(self._DOMAIN, url))
|
||||||
|
|
||||||
|
def _fetch_page(self, id_, url_type, page):
|
||||||
|
raise NotImplementedError('This method must be implemented by subclasses')
|
||||||
|
|
||||||
|
def _paged_list(self, _id, url_type):
|
||||||
|
count = 0
|
||||||
|
for page in itertools.count(1):
|
||||||
|
data = self._fetch_page(_id, url_type, page)
|
||||||
|
entries = list(self._parse_items(data.get('items')))
|
||||||
|
count += len(entries)
|
||||||
|
yield from entries
|
||||||
|
if not data.get('hasMore') or try_call(lambda: count > data['total']):
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
class ZingMp3IE(ZingMp3BaseIE):
|
class ZingMp3IE(ZingMp3BaseIE):
|
||||||
_VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'bai-hat|video-clip|embed'
|
_VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'bai-hat|video-clip|embed'
|
||||||
|
@ -166,8 +175,11 @@ def _real_extract(self, url):
|
||||||
'height': int_or_none(res),
|
'height': int_or_none(res),
|
||||||
})
|
})
|
||||||
|
|
||||||
if not formats and item.get('msg') == 'Sorry, this content is not available in your country.':
|
if not formats:
|
||||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
|
if item.get('msg') == 'Sorry, this content is not available in your country.':
|
||||||
|
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
|
||||||
|
else:
|
||||||
|
self.raise_no_formats('The song is only for VIP accounts.')
|
||||||
|
|
||||||
lyric = item.get('lyric') or self._call_api('lyric', {'id': item_id}, fatal=False).get('file')
|
lyric = item.get('lyric') or self._call_api('lyric', {'id': item_id}, fatal=False).get('file')
|
||||||
|
|
||||||
|
@ -200,7 +212,7 @@ class ZingMp3AlbumIE(ZingMp3BaseIE):
|
||||||
'id': 'ZWZAEZZD',
|
'id': 'ZWZAEZZD',
|
||||||
'title': 'Những Bài Hát Hay Nhất Của Mr. Siro',
|
'title': 'Những Bài Hát Hay Nhất Của Mr. Siro',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 49,
|
'playlist_mincount': 20,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html',
|
'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -305,22 +317,20 @@ class ZingMp3ChartMusicVideoIE(ZingMp3BaseIE):
|
||||||
'id': 'IWZ9Z086',
|
'id': 'IWZ9Z086',
|
||||||
'title': 'the-loai-video_Khong-Loi',
|
'title': 'the-loai-video_Khong-Loi',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 10,
|
'playlist_mincount': 1,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _fetch_page(self, song_id, url_type, page):
|
def _fetch_page(self, song_id, url_type, page):
|
||||||
return self._parse_items(self._call_api(url_type, {
|
return self._call_api(url_type, {
|
||||||
'id': song_id,
|
'id': song_id,
|
||||||
'type': 'genre',
|
'type': 'genre',
|
||||||
'page': page + 1,
|
'page': page,
|
||||||
'count': self._PER_PAGE
|
'count': self._PER_PAGE
|
||||||
}).get('items'))
|
})
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
song_id, regions, url_type = self._match_valid_url(url).group('id', 'regions', 'type')
|
song_id, regions, url_type = self._match_valid_url(url).group('id', 'regions', 'type')
|
||||||
return self.playlist_result(
|
return self.playlist_result(self._paged_list(song_id, url_type), song_id, f'{url_type}_{regions}')
|
||||||
OnDemandPagedList(functools.partial(self._fetch_page, song_id, url_type), self._PER_PAGE),
|
|
||||||
song_id, f'{url_type}_{regions}')
|
|
||||||
|
|
||||||
|
|
||||||
class ZingMp3UserIE(ZingMp3BaseIE):
|
class ZingMp3UserIE(ZingMp3BaseIE):
|
||||||
|
@ -331,7 +341,7 @@ class ZingMp3UserIE(ZingMp3BaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'IWZ98609',
|
'id': 'IWZ98609',
|
||||||
'title': 'Mr. Siro - bai-hat',
|
'title': 'Mr. Siro - bai-hat',
|
||||||
'description': 'md5:85ab29bd7b21725c12bf76fd1d6922e5',
|
'description': 'md5:5bdcf45e955dc1b8d7f518f322ffef36',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 91,
|
'playlist_mincount': 91,
|
||||||
}, {
|
}, {
|
||||||
|
@ -339,7 +349,7 @@ class ZingMp3UserIE(ZingMp3BaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'IWZ98609',
|
'id': 'IWZ98609',
|
||||||
'title': 'Mr. Siro - album',
|
'title': 'Mr. Siro - album',
|
||||||
'description': 'md5:85ab29bd7b21725c12bf76fd1d6922e5',
|
'description': 'md5:5bdcf45e955dc1b8d7f518f322ffef36',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 3,
|
'playlist_mincount': 3,
|
||||||
}, {
|
}, {
|
||||||
|
@ -347,7 +357,7 @@ class ZingMp3UserIE(ZingMp3BaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'IWZ98609',
|
'id': 'IWZ98609',
|
||||||
'title': 'Mr. Siro - single',
|
'title': 'Mr. Siro - single',
|
||||||
'description': 'md5:85ab29bd7b21725c12bf76fd1d6922e5',
|
'description': 'md5:5bdcf45e955dc1b8d7f518f322ffef36',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 20,
|
'playlist_mincount': 20,
|
||||||
}, {
|
}, {
|
||||||
|
@ -355,19 +365,19 @@ class ZingMp3UserIE(ZingMp3BaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'IWZ98609',
|
'id': 'IWZ98609',
|
||||||
'title': 'Mr. Siro - video',
|
'title': 'Mr. Siro - video',
|
||||||
'description': 'md5:85ab29bd7b21725c12bf76fd1d6922e5',
|
'description': 'md5:5bdcf45e955dc1b8d7f518f322ffef36',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 15,
|
'playlist_mincount': 15,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _fetch_page(self, user_id, url_type, page):
|
def _fetch_page(self, user_id, url_type, page):
|
||||||
url_type = 'user-list-song' if url_type == 'bai-hat' else 'user-list-video'
|
url_type = 'user-list-song' if url_type == 'bai-hat' else 'user-list-video'
|
||||||
return self._parse_items(self._call_api(url_type, {
|
return self._call_api(url_type, {
|
||||||
'id': user_id,
|
'id': user_id,
|
||||||
'type': 'artist',
|
'type': 'artist',
|
||||||
'page': page + 1,
|
'page': page,
|
||||||
'count': self._PER_PAGE
|
'count': self._PER_PAGE
|
||||||
}, query={'sort': 'new', 'sectionId': 'aSong'}).get('items'))
|
})
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
user_alias, url_type = self._match_valid_url(url).group('user', 'type')
|
user_alias, url_type = self._match_valid_url(url).group('user', 'type')
|
||||||
|
@ -376,10 +386,41 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
user_info = self._call_api('info-artist', {}, user_alias, query={'alias': user_alias})
|
user_info = self._call_api('info-artist', {}, user_alias, query={'alias': user_alias})
|
||||||
if url_type in ('bai-hat', 'video'):
|
if url_type in ('bai-hat', 'video'):
|
||||||
entries = OnDemandPagedList(
|
entries = self._paged_list(user_info['id'], url_type)
|
||||||
functools.partial(self._fetch_page, user_info['id'], url_type), self._PER_PAGE)
|
|
||||||
else:
|
else:
|
||||||
entries = self._parse_items(traverse_obj(user_info, (
|
entries = self._parse_items(traverse_obj(user_info, (
|
||||||
'sections', lambda _, v: v['link'] == f'/{user_alias}/{url_type}', 'items', ...)))
|
'sections',
|
||||||
|
lambda _, v: v['sectionId'] == 'aAlbum' if url_type == 'album' else v['sectionId'] == 'aSingle',
|
||||||
|
'items', ...)))
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, user_info['id'], f'{user_info.get("name")} - {url_type}', user_info.get('biography'))
|
entries, user_info['id'], f'{user_info.get("name")} - {url_type}', user_info.get('biography'))
|
||||||
|
|
||||||
|
|
||||||
|
class ZingMp3HubIE(ZingMp3BaseIE):
|
||||||
|
IE_NAME = 'zingmp3:hub'
|
||||||
|
_VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<type>hub)/(?P<regions>[^/]+)/(?P<id>[^\.]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://zingmp3.vn/hub/Nhac-Moi/IWZ9Z0CA.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'IWZ9Z0CA',
|
||||||
|
'title': 'Nhạc Mới',
|
||||||
|
'description': 'md5:1cc31b68a6f746427b07b2756c22a558',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 20,
|
||||||
|
}, {
|
||||||
|
'url': 'https://zingmp3.vn/hub/Nhac-Viet/IWZ9Z087.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'IWZ9Z087',
|
||||||
|
'title': 'Nhạc Việt',
|
||||||
|
'description': 'md5:acc976c8bdde64d5c6ee4a92c39f7a77',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 30,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
song_id, regions, url_type = self._match_valid_url(url).group('id', 'regions', 'type')
|
||||||
|
hub_detail = self._call_api(url_type, {'id': song_id})
|
||||||
|
entries = self._parse_items(traverse_obj(hub_detail, (
|
||||||
|
'sections', lambda _, v: v['sectionId'] == 'hub', 'items', ...)))
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, song_id, hub_detail.get('title'), hub_detail.get('description'))
|
||||||
|
|
|
@ -20,7 +20,12 @@
|
||||||
|
|
||||||
def _js_bit_op(op):
|
def _js_bit_op(op):
|
||||||
def zeroise(x):
|
def zeroise(x):
|
||||||
return 0 if x in (None, JS_Undefined) else x
|
if x in (None, JS_Undefined):
|
||||||
|
return 0
|
||||||
|
with contextlib.suppress(TypeError):
|
||||||
|
if math.isnan(x): # NB: NaN cannot be checked by membership
|
||||||
|
return 0
|
||||||
|
return x
|
||||||
|
|
||||||
def wrapped(a, b):
|
def wrapped(a, b):
|
||||||
return op(zeroise(a), zeroise(b)) & 0xffffffff
|
return op(zeroise(a), zeroise(b)) & 0xffffffff
|
||||||
|
@ -243,7 +248,7 @@ def _separate(expr, delim=',', max_split=None):
|
||||||
return
|
return
|
||||||
counters = {k: 0 for k in _MATCHING_PARENS.values()}
|
counters = {k: 0 for k in _MATCHING_PARENS.values()}
|
||||||
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
|
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
|
||||||
in_quote, escaping, after_op, in_regex_char_group, in_unary_op = None, False, True, False, False
|
in_quote, escaping, after_op, in_regex_char_group = None, False, True, False
|
||||||
for idx, char in enumerate(expr):
|
for idx, char in enumerate(expr):
|
||||||
if not in_quote and char in _MATCHING_PARENS:
|
if not in_quote and char in _MATCHING_PARENS:
|
||||||
counters[_MATCHING_PARENS[char]] += 1
|
counters[_MATCHING_PARENS[char]] += 1
|
||||||
|
@ -347,8 +352,10 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
||||||
inner, outer = self._separate(expr, expr[0], 1)
|
inner, outer = self._separate(expr, expr[0], 1)
|
||||||
if expr[0] == '/':
|
if expr[0] == '/':
|
||||||
flags, outer = self._regex_flags(outer)
|
flags, outer = self._regex_flags(outer)
|
||||||
|
# We don't support regex methods yet, so no point compiling it
|
||||||
|
inner = f'{inner}/{flags}'
|
||||||
# Avoid https://github.com/python/cpython/issues/74534
|
# Avoid https://github.com/python/cpython/issues/74534
|
||||||
inner = re.compile(inner[1:].replace('[[', r'[\['), flags=flags)
|
# inner = re.compile(inner[1:].replace('[[', r'[\['), flags=flags)
|
||||||
else:
|
else:
|
||||||
inner = json.loads(js_to_json(f'{inner}{expr[0]}', strict=True))
|
inner = json.loads(js_to_json(f'{inner}{expr[0]}', strict=True))
|
||||||
if not outer:
|
if not outer:
|
||||||
|
@ -438,7 +445,7 @@ def dict_item(key, val):
|
||||||
err = e
|
err = e
|
||||||
|
|
||||||
pending = (None, False)
|
pending = (None, False)
|
||||||
m = re.match(r'catch\s*(?P<err>\(\s*{_NAME_RE}\s*\))?\{{'.format(**globals()), expr)
|
m = re.match(fr'catch\s*(?P<err>\(\s*{_NAME_RE}\s*\))?\{{', expr)
|
||||||
if m:
|
if m:
|
||||||
sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
|
sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
|
||||||
if err:
|
if err:
|
||||||
|
|
|
@ -34,6 +34,7 @@
|
||||||
join_nonempty,
|
join_nonempty,
|
||||||
orderedSet_from_options,
|
orderedSet_from_options,
|
||||||
remove_end,
|
remove_end,
|
||||||
|
variadic,
|
||||||
write_string,
|
write_string,
|
||||||
)
|
)
|
||||||
from .version import CHANNEL, __version__
|
from .version import CHANNEL, __version__
|
||||||
|
@ -250,7 +251,7 @@ def _dict_from_options_callback(
|
||||||
if multiple_args:
|
if multiple_args:
|
||||||
val = [val, *value[1:]]
|
val = [val, *value[1:]]
|
||||||
elif default_key is not None:
|
elif default_key is not None:
|
||||||
keys, val = [default_key], value
|
keys, val = variadic(default_key), value
|
||||||
else:
|
else:
|
||||||
raise optparse.OptionValueError(
|
raise optparse.OptionValueError(
|
||||||
f'wrong {opt_str} formatting; it should be {option.metavar}, not "{value}"')
|
f'wrong {opt_str} formatting; it should be {option.metavar}, not "{value}"')
|
||||||
|
@ -323,7 +324,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
||||||
help='Print program version and exit')
|
help='Print program version and exit')
|
||||||
general.add_option(
|
general.add_option(
|
||||||
'-U', '--update',
|
'-U', '--update',
|
||||||
action='store_true', dest='update_self',
|
action='store_const', dest='update_self', const=CHANNEL,
|
||||||
help=format_field(
|
help=format_field(
|
||||||
is_non_updateable(), None, 'Check if updates are available. %s',
|
is_non_updateable(), None, 'Check if updates are available. %s',
|
||||||
default=f'Update this program to the latest {CHANNEL} version'))
|
default=f'Update this program to the latest {CHANNEL} version'))
|
||||||
|
@ -335,9 +336,9 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
||||||
'--update-to',
|
'--update-to',
|
||||||
action='store', dest='update_self', metavar='[CHANNEL]@[TAG]',
|
action='store', dest='update_self', metavar='[CHANNEL]@[TAG]',
|
||||||
help=(
|
help=(
|
||||||
'Upgrade/downgrade to a specific version. CHANNEL and TAG defaults to '
|
'Upgrade/downgrade to a specific version. CHANNEL can be a repository as well. '
|
||||||
f'"{CHANNEL}" and "latest" respectively if omitted; See "UPDATE" for details. '
|
f'CHANNEL and TAG default to "{CHANNEL.partition("@")[0]}" and "latest" respectively if omitted; '
|
||||||
f'Supported channels: {", ".join(UPDATE_SOURCES)}'))
|
f'See "UPDATE" for details. Supported channels: {", ".join(UPDATE_SOURCES)}'))
|
||||||
general.add_option(
|
general.add_option(
|
||||||
'-i', '--ignore-errors',
|
'-i', '--ignore-errors',
|
||||||
action='store_true', dest='ignoreerrors',
|
action='store_true', dest='ignoreerrors',
|
||||||
|
@ -411,7 +412,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
||||||
general.add_option(
|
general.add_option(
|
||||||
'--no-flat-playlist',
|
'--no-flat-playlist',
|
||||||
action='store_false', dest='extract_flat',
|
action='store_false', dest='extract_flat',
|
||||||
help='Extract the videos of a playlist')
|
help='Fully extract the videos of a playlist (default)')
|
||||||
general.add_option(
|
general.add_option(
|
||||||
'--live-from-start',
|
'--live-from-start',
|
||||||
action='store_true', dest='live_from_start',
|
action='store_true', dest='live_from_start',
|
||||||
|
@ -447,8 +448,25 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
||||||
help='Do not mark videos watched (default)')
|
help='Do not mark videos watched (default)')
|
||||||
general.add_option(
|
general.add_option(
|
||||||
'--no-colors', '--no-colours',
|
'--no-colors', '--no-colours',
|
||||||
action='store_true', dest='no_color', default=False,
|
action='store_const', dest='color', const={
|
||||||
help='Do not emit color codes in output (Alias: --no-colours)')
|
'stdout': 'no_color',
|
||||||
|
'stderr': 'no_color',
|
||||||
|
},
|
||||||
|
help=optparse.SUPPRESS_HELP)
|
||||||
|
general.add_option(
|
||||||
|
'--color',
|
||||||
|
dest='color', metavar='[STREAM:]POLICY', default={}, type='str',
|
||||||
|
action='callback', callback=_dict_from_options_callback,
|
||||||
|
callback_kwargs={
|
||||||
|
'allowed_keys': 'stdout|stderr',
|
||||||
|
'default_key': ['stdout', 'stderr'],
|
||||||
|
'process': str.strip,
|
||||||
|
}, help=(
|
||||||
|
'Whether to emit color codes in output, optionally prefixed by '
|
||||||
|
'the STREAM (stdout or stderr) to apply the setting to. '
|
||||||
|
'Can be one of "always", "auto" (default), "never", or '
|
||||||
|
'"no_color" (use non color terminal sequences). '
|
||||||
|
'Can be used multiple times'))
|
||||||
general.add_option(
|
general.add_option(
|
||||||
'--compat-options',
|
'--compat-options',
|
||||||
metavar='OPTS', dest='compat_opts', default=set(), type='str',
|
metavar='OPTS', dest='compat_opts', default=set(), type='str',
|
||||||
|
@ -528,11 +546,11 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
||||||
help=optparse.SUPPRESS_HELP)
|
help=optparse.SUPPRESS_HELP)
|
||||||
geo.add_option(
|
geo.add_option(
|
||||||
'--xff', metavar='VALUE',
|
'--xff', metavar='VALUE',
|
||||||
dest='geo_bypass', default="default",
|
dest='geo_bypass', default='default',
|
||||||
help=(
|
help=(
|
||||||
'How to fake X-Forwarded-For HTTP header to try bypassing geographic restriction. '
|
'How to fake X-Forwarded-For HTTP header to try bypassing geographic restriction. '
|
||||||
'One of "default" (Only when known to be useful), "never", '
|
'One of "default" (only when known to be useful), "never", '
|
||||||
'a two-letter ISO 3166-2 country code, or an IP block in CIDR notation'))
|
'an IP block in CIDR notation, or a two-letter ISO 3166-2 country code'))
|
||||||
geo.add_option(
|
geo.add_option(
|
||||||
'--geo-bypass',
|
'--geo-bypass',
|
||||||
action='store_const', dest='geo_bypass', const='default',
|
action='store_const', dest='geo_bypass', const='default',
|
||||||
|
@ -624,7 +642,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
||||||
'that contains the phrase "cats & dogs" (caseless). '
|
'that contains the phrase "cats & dogs" (caseless). '
|
||||||
'Use "--match-filter -" to interactively ask whether to download each video'))
|
'Use "--match-filter -" to interactively ask whether to download each video'))
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--no-match-filter',
|
'--no-match-filters',
|
||||||
dest='match_filter', action='store_const', const=None,
|
dest='match_filter', action='store_const', const=None,
|
||||||
help='Do not use any --match-filter (default)')
|
help='Do not use any --match-filter (default)')
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
Popen,
|
Popen,
|
||||||
cached_method,
|
cached_method,
|
||||||
deprecation_warning,
|
deprecation_warning,
|
||||||
|
network_exceptions,
|
||||||
remove_end,
|
remove_end,
|
||||||
remove_start,
|
remove_start,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
|
@ -128,27 +129,36 @@ def __init__(self, ydl, target=None):
|
||||||
self.ydl = ydl
|
self.ydl = ydl
|
||||||
|
|
||||||
self.target_channel, sep, self.target_tag = (target or CHANNEL).rpartition('@')
|
self.target_channel, sep, self.target_tag = (target or CHANNEL).rpartition('@')
|
||||||
if not sep and self.target_tag in UPDATE_SOURCES: # stable => stable@latest
|
# stable => stable@latest
|
||||||
self.target_channel, self.target_tag = self.target_tag, None
|
if not sep and ('/' in self.target_tag or self.target_tag in UPDATE_SOURCES):
|
||||||
|
self.target_channel = self.target_tag
|
||||||
|
self.target_tag = None
|
||||||
elif not self.target_channel:
|
elif not self.target_channel:
|
||||||
self.target_channel = CHANNEL
|
self.target_channel = CHANNEL.partition('@')[0]
|
||||||
|
|
||||||
if not self.target_tag:
|
if not self.target_tag:
|
||||||
self.target_tag, self._exact = 'latest', False
|
self.target_tag = 'latest'
|
||||||
|
self._exact = False
|
||||||
elif self.target_tag != 'latest':
|
elif self.target_tag != 'latest':
|
||||||
self.target_tag = f'tags/{self.target_tag}'
|
self.target_tag = f'tags/{self.target_tag}'
|
||||||
|
|
||||||
@property
|
if '/' in self.target_channel:
|
||||||
def _target_repo(self):
|
self._target_repo = self.target_channel
|
||||||
try:
|
if self.target_channel not in (CHANNEL, *UPDATE_SOURCES.values()):
|
||||||
return UPDATE_SOURCES[self.target_channel]
|
self.ydl.report_warning(
|
||||||
except KeyError:
|
f'You are switching to an {self.ydl._format_err("unofficial", "red")} executable '
|
||||||
return self._report_error(
|
f'from {self.ydl._format_err(self._target_repo, self.ydl.Styles.EMPHASIS)}. '
|
||||||
f'Invalid update channel {self.target_channel!r} requested. '
|
f'Run {self.ydl._format_err("at your own risk", "light red")}')
|
||||||
f'Valid channels are {", ".join(UPDATE_SOURCES)}', True)
|
self.restart = self._blocked_restart
|
||||||
|
else:
|
||||||
|
self._target_repo = UPDATE_SOURCES.get(self.target_channel)
|
||||||
|
if not self._target_repo:
|
||||||
|
self._report_error(
|
||||||
|
f'Invalid update channel {self.target_channel!r} requested. '
|
||||||
|
f'Valid channels are {", ".join(UPDATE_SOURCES)}', True)
|
||||||
|
|
||||||
def _version_compare(self, a, b, channel=CHANNEL):
|
def _version_compare(self, a, b, channel=CHANNEL):
|
||||||
if channel != self.target_channel:
|
if self._exact and channel != self.target_channel:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if _VERSION_RE.fullmatch(f'{a}.{b}'):
|
if _VERSION_RE.fullmatch(f'{a}.{b}'):
|
||||||
|
@ -258,8 +268,8 @@ def check_update(self):
|
||||||
self.ydl.to_screen((
|
self.ydl.to_screen((
|
||||||
f'Available version: {self._label(self.target_channel, self.latest_version)}, ' if self.target_tag == 'latest' else ''
|
f'Available version: {self._label(self.target_channel, self.latest_version)}, ' if self.target_tag == 'latest' else ''
|
||||||
) + f'Current version: {self._label(CHANNEL, self.current_version)}')
|
) + f'Current version: {self._label(CHANNEL, self.current_version)}')
|
||||||
except Exception:
|
except network_exceptions as e:
|
||||||
return self._report_network_error('obtain version info', delim='; Please try again later or')
|
return self._report_network_error(f'obtain version info ({e})', delim='; Please try again later or')
|
||||||
|
|
||||||
if not is_non_updateable():
|
if not is_non_updateable():
|
||||||
self.ydl.to_screen(f'Current Build Hash: {_sha256_file(self.filename)}')
|
self.ydl.to_screen(f'Current Build Hash: {_sha256_file(self.filename)}')
|
||||||
|
@ -303,7 +313,7 @@ def update(self):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
newcontent = self._download(self.release_name, self._tag)
|
newcontent = self._download(self.release_name, self._tag)
|
||||||
except Exception as e:
|
except network_exceptions as e:
|
||||||
if isinstance(e, urllib.error.HTTPError) and e.code == 404:
|
if isinstance(e, urllib.error.HTTPError) and e.code == 404:
|
||||||
return self._report_error(
|
return self._report_error(
|
||||||
f'The requested tag {self._label(self.target_channel, self.target_tag)} does not exist', True)
|
f'The requested tag {self._label(self.target_channel, self.target_tag)} does not exist', True)
|
||||||
|
@ -371,6 +381,12 @@ def restart(self):
|
||||||
_, _, returncode = Popen.run(self.cmd)
|
_, _, returncode = Popen.run(self.cmd)
|
||||||
return returncode
|
return returncode
|
||||||
|
|
||||||
|
def _blocked_restart(self):
|
||||||
|
self._report_error(
|
||||||
|
'Automatically restarting into custom builds is disabled for security reasons. '
|
||||||
|
'Restart yt-dlp to use the updated version', expected=True)
|
||||||
|
return self.ydl._download_retcode
|
||||||
|
|
||||||
|
|
||||||
def run_update(ydl):
|
def run_update(ydl):
|
||||||
"""Update the program file with the latest version from the repository
|
"""Update the program file with the latest version from the repository
|
||||||
|
|
14
yt_dlp/utils/__init__.py
Normal file
14
yt_dlp/utils/__init__.py
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
from ..compat.compat_utils import passthrough_module
|
||||||
|
|
||||||
|
# XXX: Implement this the same way as other DeprecationWarnings without circular import
|
||||||
|
passthrough_module(__name__, '._legacy', callback=lambda attr: warnings.warn(
|
||||||
|
DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=5))
|
||||||
|
del passthrough_module
|
||||||
|
|
||||||
|
# isort: off
|
||||||
|
from .traversal import *
|
||||||
|
from ._utils import *
|
||||||
|
from ._utils import _configuration_args, _get_exe_version_output
|
||||||
|
from ._deprecated import *
|
30
yt_dlp/utils/_deprecated.py
Normal file
30
yt_dlp/utils/_deprecated.py
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
"""Deprecated - New code should avoid these"""
|
||||||
|
|
||||||
|
from ._utils import preferredencoding
|
||||||
|
|
||||||
|
|
||||||
|
def encodeFilename(s, for_subprocess=False):
|
||||||
|
assert isinstance(s, str)
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
def decodeFilename(b, for_subprocess=False):
|
||||||
|
return b
|
||||||
|
|
||||||
|
|
||||||
|
def decodeArgument(b):
|
||||||
|
return b
|
||||||
|
|
||||||
|
|
||||||
|
def decodeOption(optval):
|
||||||
|
if optval is None:
|
||||||
|
return optval
|
||||||
|
if isinstance(optval, bytes):
|
||||||
|
optval = optval.decode(preferredencoding())
|
||||||
|
|
||||||
|
assert isinstance(optval, str)
|
||||||
|
return optval
|
||||||
|
|
||||||
|
|
||||||
|
def error_to_compat_str(err):
|
||||||
|
return str(err)
|
176
yt_dlp/utils/_legacy.py
Normal file
176
yt_dlp/utils/_legacy.py
Normal file
|
@ -0,0 +1,176 @@
|
||||||
|
"""No longer used and new code should not use. Exists only for API compat."""
|
||||||
|
|
||||||
|
import platform
|
||||||
|
import struct
|
||||||
|
import sys
|
||||||
|
import urllib.parse
|
||||||
|
import zlib
|
||||||
|
|
||||||
|
from ._utils import decode_base_n, preferredencoding
|
||||||
|
from .traversal import traverse_obj
|
||||||
|
from ..dependencies import certifi, websockets
|
||||||
|
|
||||||
|
# isort: split
|
||||||
|
from ..cookies import YoutubeDLCookieJar # noqa: F401
|
||||||
|
|
||||||
|
has_certifi = bool(certifi)
|
||||||
|
has_websockets = bool(websockets)
|
||||||
|
|
||||||
|
|
||||||
|
def load_plugins(name, suffix, namespace):
|
||||||
|
from ..plugins import load_plugins
|
||||||
|
ret = load_plugins(name, suffix)
|
||||||
|
namespace.update(ret)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
def traverse_dict(dictn, keys, casesense=True):
|
||||||
|
return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
|
||||||
|
|
||||||
|
|
||||||
|
def decode_base(value, digits):
|
||||||
|
return decode_base_n(value, table=digits)
|
||||||
|
|
||||||
|
|
||||||
|
def platform_name():
|
||||||
|
""" Returns the platform name as a str """
|
||||||
|
return platform.platform()
|
||||||
|
|
||||||
|
|
||||||
|
def get_subprocess_encoding():
|
||||||
|
if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
|
||||||
|
# For subprocess calls, encode with locale encoding
|
||||||
|
# Refer to http://stackoverflow.com/a/9951851/35070
|
||||||
|
encoding = preferredencoding()
|
||||||
|
else:
|
||||||
|
encoding = sys.getfilesystemencoding()
|
||||||
|
if encoding is None:
|
||||||
|
encoding = 'utf-8'
|
||||||
|
return encoding
|
||||||
|
|
||||||
|
|
||||||
|
# UNUSED
|
||||||
|
# Based on png2str() written by @gdkchan and improved by @yokrysty
|
||||||
|
# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
|
||||||
|
def decode_png(png_data):
|
||||||
|
# Reference: https://www.w3.org/TR/PNG/
|
||||||
|
header = png_data[8:]
|
||||||
|
|
||||||
|
if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
|
||||||
|
raise OSError('Not a valid PNG file.')
|
||||||
|
|
||||||
|
int_map = {1: '>B', 2: '>H', 4: '>I'}
|
||||||
|
unpack_integer = lambda x: struct.unpack(int_map[len(x)], x)[0]
|
||||||
|
|
||||||
|
chunks = []
|
||||||
|
|
||||||
|
while header:
|
||||||
|
length = unpack_integer(header[:4])
|
||||||
|
header = header[4:]
|
||||||
|
|
||||||
|
chunk_type = header[:4]
|
||||||
|
header = header[4:]
|
||||||
|
|
||||||
|
chunk_data = header[:length]
|
||||||
|
header = header[length:]
|
||||||
|
|
||||||
|
header = header[4:] # Skip CRC
|
||||||
|
|
||||||
|
chunks.append({
|
||||||
|
'type': chunk_type,
|
||||||
|
'length': length,
|
||||||
|
'data': chunk_data
|
||||||
|
})
|
||||||
|
|
||||||
|
ihdr = chunks[0]['data']
|
||||||
|
|
||||||
|
width = unpack_integer(ihdr[:4])
|
||||||
|
height = unpack_integer(ihdr[4:8])
|
||||||
|
|
||||||
|
idat = b''
|
||||||
|
|
||||||
|
for chunk in chunks:
|
||||||
|
if chunk['type'] == b'IDAT':
|
||||||
|
idat += chunk['data']
|
||||||
|
|
||||||
|
if not idat:
|
||||||
|
raise OSError('Unable to read PNG data.')
|
||||||
|
|
||||||
|
decompressed_data = bytearray(zlib.decompress(idat))
|
||||||
|
|
||||||
|
stride = width * 3
|
||||||
|
pixels = []
|
||||||
|
|
||||||
|
def _get_pixel(idx):
|
||||||
|
x = idx % stride
|
||||||
|
y = idx // stride
|
||||||
|
return pixels[y][x]
|
||||||
|
|
||||||
|
for y in range(height):
|
||||||
|
basePos = y * (1 + stride)
|
||||||
|
filter_type = decompressed_data[basePos]
|
||||||
|
|
||||||
|
current_row = []
|
||||||
|
|
||||||
|
pixels.append(current_row)
|
||||||
|
|
||||||
|
for x in range(stride):
|
||||||
|
color = decompressed_data[1 + basePos + x]
|
||||||
|
basex = y * stride + x
|
||||||
|
left = 0
|
||||||
|
up = 0
|
||||||
|
|
||||||
|
if x > 2:
|
||||||
|
left = _get_pixel(basex - 3)
|
||||||
|
if y > 0:
|
||||||
|
up = _get_pixel(basex - stride)
|
||||||
|
|
||||||
|
if filter_type == 1: # Sub
|
||||||
|
color = (color + left) & 0xff
|
||||||
|
elif filter_type == 2: # Up
|
||||||
|
color = (color + up) & 0xff
|
||||||
|
elif filter_type == 3: # Average
|
||||||
|
color = (color + ((left + up) >> 1)) & 0xff
|
||||||
|
elif filter_type == 4: # Paeth
|
||||||
|
a = left
|
||||||
|
b = up
|
||||||
|
c = 0
|
||||||
|
|
||||||
|
if x > 2 and y > 0:
|
||||||
|
c = _get_pixel(basex - stride - 3)
|
||||||
|
|
||||||
|
p = a + b - c
|
||||||
|
|
||||||
|
pa = abs(p - a)
|
||||||
|
pb = abs(p - b)
|
||||||
|
pc = abs(p - c)
|
||||||
|
|
||||||
|
if pa <= pb and pa <= pc:
|
||||||
|
color = (color + a) & 0xff
|
||||||
|
elif pb <= pc:
|
||||||
|
color = (color + b) & 0xff
|
||||||
|
else:
|
||||||
|
color = (color + c) & 0xff
|
||||||
|
|
||||||
|
current_row.append(color)
|
||||||
|
|
||||||
|
return width, height, pixels
|
||||||
|
|
||||||
|
|
||||||
|
def register_socks_protocols():
|
||||||
|
# "Register" SOCKS protocols
|
||||||
|
# In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
|
||||||
|
# URLs with protocols not in urlparse.uses_netloc are not handled correctly
|
||||||
|
for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
|
||||||
|
if scheme not in urllib.parse.uses_netloc:
|
||||||
|
urllib.parse.uses_netloc.append(scheme)
|
||||||
|
|
||||||
|
|
||||||
|
def handle_youtubedl_headers(headers):
|
||||||
|
filtered_headers = headers
|
||||||
|
|
||||||
|
if 'Youtubedl-no-compression' in filtered_headers:
|
||||||
|
filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'}
|
||||||
|
del filtered_headers['Youtubedl-no-compression']
|
||||||
|
|
||||||
|
return filtered_headers
|
|
@ -47,26 +47,20 @@
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
from .compat import functools # isort: split
|
from . import traversal
|
||||||
from .compat import (
|
|
||||||
|
from ..compat import functools # isort: split
|
||||||
|
from ..compat import (
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
compat_expanduser,
|
compat_expanduser,
|
||||||
compat_HTMLParseError,
|
compat_HTMLParseError,
|
||||||
compat_os_name,
|
compat_os_name,
|
||||||
compat_shlex_quote,
|
compat_shlex_quote,
|
||||||
)
|
)
|
||||||
from .dependencies import brotli, certifi, websockets, xattr
|
from ..dependencies import brotli, certifi, websockets, xattr
|
||||||
from .socks import ProxyType, sockssocket
|
from ..socks import ProxyType, sockssocket
|
||||||
|
|
||||||
|
|
||||||
def register_socks_protocols():
|
|
||||||
# "Register" SOCKS protocols
|
|
||||||
# In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
|
|
||||||
# URLs with protocols not in urlparse.uses_netloc are not handled correctly
|
|
||||||
for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
|
|
||||||
if scheme not in urllib.parse.uses_netloc:
|
|
||||||
urllib.parse.uses_netloc.append(scheme)
|
|
||||||
|
|
||||||
|
__name__ = __name__.rsplit('.', 1)[0] # Pretend to be the parent module
|
||||||
|
|
||||||
# This is not clearly defined otherwise
|
# This is not clearly defined otherwise
|
||||||
compiled_regex_type = type(re.compile(''))
|
compiled_regex_type = type(re.compile(''))
|
||||||
|
@ -136,8 +130,13 @@ def random_user_agent():
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
NO_DEFAULT = object()
|
class NO_DEFAULT:
|
||||||
IDENTITY = lambda x: x
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def IDENTITY(x):
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
ENGLISH_MONTH_NAMES = [
|
ENGLISH_MONTH_NAMES = [
|
||||||
'January', 'February', 'March', 'April', 'May', 'June',
|
'January', 'February', 'March', 'April', 'May', 'June',
|
||||||
|
@ -224,6 +223,7 @@ def random_user_agent():
|
||||||
'%d/%m/%y',
|
'%d/%m/%y',
|
||||||
'%d/%m/%Y %H:%M:%S',
|
'%d/%m/%Y %H:%M:%S',
|
||||||
'%d-%m-%Y %H:%M',
|
'%d-%m-%Y %H:%M',
|
||||||
|
'%H:%M %d/%m/%Y',
|
||||||
])
|
])
|
||||||
|
|
||||||
DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
|
DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
|
||||||
|
@ -928,27 +928,6 @@ def run(cls, *args, timeout=None, **kwargs):
|
||||||
return stdout or default, stderr or default, proc.returncode
|
return stdout or default, stderr or default, proc.returncode
|
||||||
|
|
||||||
|
|
||||||
def get_subprocess_encoding():
|
|
||||||
if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
|
|
||||||
# For subprocess calls, encode with locale encoding
|
|
||||||
# Refer to http://stackoverflow.com/a/9951851/35070
|
|
||||||
encoding = preferredencoding()
|
|
||||||
else:
|
|
||||||
encoding = sys.getfilesystemencoding()
|
|
||||||
if encoding is None:
|
|
||||||
encoding = 'utf-8'
|
|
||||||
return encoding
|
|
||||||
|
|
||||||
|
|
||||||
def encodeFilename(s, for_subprocess=False):
|
|
||||||
assert isinstance(s, str)
|
|
||||||
return s
|
|
||||||
|
|
||||||
|
|
||||||
def decodeFilename(b, for_subprocess=False):
|
|
||||||
return b
|
|
||||||
|
|
||||||
|
|
||||||
def encodeArgument(s):
|
def encodeArgument(s):
|
||||||
# Legacy code that uses byte strings
|
# Legacy code that uses byte strings
|
||||||
# Uncomment the following line after fixing all post processors
|
# Uncomment the following line after fixing all post processors
|
||||||
|
@ -956,20 +935,6 @@ def encodeArgument(s):
|
||||||
return s if isinstance(s, str) else s.decode('ascii')
|
return s if isinstance(s, str) else s.decode('ascii')
|
||||||
|
|
||||||
|
|
||||||
def decodeArgument(b):
|
|
||||||
return b
|
|
||||||
|
|
||||||
|
|
||||||
def decodeOption(optval):
|
|
||||||
if optval is None:
|
|
||||||
return optval
|
|
||||||
if isinstance(optval, bytes):
|
|
||||||
optval = optval.decode(preferredencoding())
|
|
||||||
|
|
||||||
assert isinstance(optval, str)
|
|
||||||
return optval
|
|
||||||
|
|
||||||
|
|
||||||
_timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
|
_timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
|
||||||
|
|
||||||
|
|
||||||
|
@ -1034,7 +999,7 @@ def make_HTTPS_handler(params, **kwargs):
|
||||||
|
|
||||||
context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
|
context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
|
||||||
if opts_check_certificate:
|
if opts_check_certificate:
|
||||||
if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
|
if certifi and 'no-certifi' not in params.get('compat_opts', []):
|
||||||
context.load_verify_locations(cafile=certifi.where())
|
context.load_verify_locations(cafile=certifi.where())
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
|
@ -1068,7 +1033,7 @@ def make_HTTPS_handler(params, **kwargs):
|
||||||
|
|
||||||
|
|
||||||
def bug_reports_message(before=';'):
|
def bug_reports_message(before=';'):
|
||||||
from .update import REPOSITORY
|
from ..update import REPOSITORY
|
||||||
|
|
||||||
msg = (f'please report this issue on https://github.com/{REPOSITORY}/issues?q= , '
|
msg = (f'please report this issue on https://github.com/{REPOSITORY}/issues?q= , '
|
||||||
'filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U')
|
'filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U')
|
||||||
|
@ -1351,25 +1316,12 @@ def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_a
|
||||||
return hc
|
return hc
|
||||||
|
|
||||||
|
|
||||||
def handle_youtubedl_headers(headers):
|
|
||||||
filtered_headers = headers
|
|
||||||
|
|
||||||
if 'Youtubedl-no-compression' in filtered_headers:
|
|
||||||
filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'}
|
|
||||||
del filtered_headers['Youtubedl-no-compression']
|
|
||||||
|
|
||||||
return filtered_headers
|
|
||||||
|
|
||||||
|
|
||||||
class YoutubeDLHandler(urllib.request.HTTPHandler):
|
class YoutubeDLHandler(urllib.request.HTTPHandler):
|
||||||
"""Handler for HTTP requests and responses.
|
"""Handler for HTTP requests and responses.
|
||||||
|
|
||||||
This class, when installed with an OpenerDirector, automatically adds
|
This class, when installed with an OpenerDirector, automatically adds
|
||||||
the standard headers to every HTTP request and handles gzipped and
|
the standard headers to every HTTP request and handles gzipped, deflated and
|
||||||
deflated responses from web servers. If compression is to be avoided in
|
brotli responses from web servers.
|
||||||
a particular request, the original request in the program code only has
|
|
||||||
to include the HTTP header "Youtubedl-no-compression", which will be
|
|
||||||
removed before making the real request.
|
|
||||||
|
|
||||||
Part of this code was copied from:
|
Part of this code was copied from:
|
||||||
|
|
||||||
|
@ -1410,6 +1362,23 @@ def brotli(data):
|
||||||
return data
|
return data
|
||||||
return brotli.decompress(data)
|
return brotli.decompress(data)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def gz(data):
|
||||||
|
gz = gzip.GzipFile(fileobj=io.BytesIO(data), mode='rb')
|
||||||
|
try:
|
||||||
|
return gz.read()
|
||||||
|
except OSError as original_oserror:
|
||||||
|
# There may be junk add the end of the file
|
||||||
|
# See http://stackoverflow.com/q/4928560/35070 for details
|
||||||
|
for i in range(1, 1024):
|
||||||
|
try:
|
||||||
|
gz = gzip.GzipFile(fileobj=io.BytesIO(data[:-i]), mode='rb')
|
||||||
|
return gz.read()
|
||||||
|
except OSError:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
raise original_oserror
|
||||||
|
|
||||||
def http_request(self, req):
|
def http_request(self, req):
|
||||||
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
|
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
|
||||||
# always respected by websites, some tend to give out URLs with non percent-encoded
|
# always respected by websites, some tend to give out URLs with non percent-encoded
|
||||||
|
@ -1432,44 +1401,32 @@ def http_request(self, req):
|
||||||
if h.capitalize() not in req.headers:
|
if h.capitalize() not in req.headers:
|
||||||
req.add_header(h, v)
|
req.add_header(h, v)
|
||||||
|
|
||||||
|
if 'Youtubedl-no-compression' in req.headers: # deprecated
|
||||||
|
req.headers.pop('Youtubedl-no-compression', None)
|
||||||
|
req.add_header('Accept-encoding', 'identity')
|
||||||
|
|
||||||
if 'Accept-encoding' not in req.headers:
|
if 'Accept-encoding' not in req.headers:
|
||||||
req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
|
req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
|
||||||
|
|
||||||
req.headers = handle_youtubedl_headers(req.headers)
|
|
||||||
|
|
||||||
return super().do_request_(req)
|
return super().do_request_(req)
|
||||||
|
|
||||||
def http_response(self, req, resp):
|
def http_response(self, req, resp):
|
||||||
old_resp = resp
|
old_resp = resp
|
||||||
# gzip
|
|
||||||
if resp.headers.get('Content-encoding', '') == 'gzip':
|
# Content-Encoding header lists the encodings in order that they were applied [1].
|
||||||
content = resp.read()
|
# To decompress, we simply do the reverse.
|
||||||
gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
|
# [1]: https://datatracker.ietf.org/doc/html/rfc9110#name-content-encoding
|
||||||
try:
|
decoded_response = None
|
||||||
uncompressed = io.BytesIO(gz.read())
|
for encoding in (e.strip() for e in reversed(resp.headers.get('Content-encoding', '').split(','))):
|
||||||
except OSError as original_ioerror:
|
if encoding == 'gzip':
|
||||||
# There may be junk add the end of the file
|
decoded_response = self.gz(decoded_response or resp.read())
|
||||||
# See http://stackoverflow.com/q/4928560/35070 for details
|
elif encoding == 'deflate':
|
||||||
for i in range(1, 1024):
|
decoded_response = self.deflate(decoded_response or resp.read())
|
||||||
try:
|
elif encoding == 'br' and brotli:
|
||||||
gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
|
decoded_response = self.brotli(decoded_response or resp.read())
|
||||||
uncompressed = io.BytesIO(gz.read())
|
|
||||||
except OSError:
|
if decoded_response is not None:
|
||||||
continue
|
resp = urllib.request.addinfourl(io.BytesIO(decoded_response), old_resp.headers, old_resp.url, old_resp.code)
|
||||||
break
|
|
||||||
else:
|
|
||||||
raise original_ioerror
|
|
||||||
resp = urllib.request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
|
|
||||||
resp.msg = old_resp.msg
|
|
||||||
# deflate
|
|
||||||
if resp.headers.get('Content-encoding', '') == 'deflate':
|
|
||||||
gz = io.BytesIO(self.deflate(resp.read()))
|
|
||||||
resp = urllib.request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
|
|
||||||
resp.msg = old_resp.msg
|
|
||||||
# brotli
|
|
||||||
if resp.headers.get('Content-encoding', '') == 'br':
|
|
||||||
resp = urllib.request.addinfourl(
|
|
||||||
io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
|
|
||||||
resp.msg = old_resp.msg
|
resp.msg = old_resp.msg
|
||||||
# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
|
# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
|
||||||
# https://github.com/ytdl-org/youtube-dl/issues/6457).
|
# https://github.com/ytdl-org/youtube-dl/issues/6457).
|
||||||
|
@ -1565,136 +1522,6 @@ def is_path_like(f):
|
||||||
return isinstance(f, (str, bytes, os.PathLike))
|
return isinstance(f, (str, bytes, os.PathLike))
|
||||||
|
|
||||||
|
|
||||||
class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
|
|
||||||
"""
|
|
||||||
See [1] for cookie file format.
|
|
||||||
|
|
||||||
1. https://curl.haxx.se/docs/http-cookies.html
|
|
||||||
"""
|
|
||||||
_HTTPONLY_PREFIX = '#HttpOnly_'
|
|
||||||
_ENTRY_LEN = 7
|
|
||||||
_HEADER = '''# Netscape HTTP Cookie File
|
|
||||||
# This file is generated by yt-dlp. Do not edit.
|
|
||||||
|
|
||||||
'''
|
|
||||||
_CookieFileEntry = collections.namedtuple(
|
|
||||||
'CookieFileEntry',
|
|
||||||
('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
|
|
||||||
|
|
||||||
def __init__(self, filename=None, *args, **kwargs):
|
|
||||||
super().__init__(None, *args, **kwargs)
|
|
||||||
if is_path_like(filename):
|
|
||||||
filename = os.fspath(filename)
|
|
||||||
self.filename = filename
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _true_or_false(cndn):
|
|
||||||
return 'TRUE' if cndn else 'FALSE'
|
|
||||||
|
|
||||||
@contextlib.contextmanager
|
|
||||||
def open(self, file, *, write=False):
|
|
||||||
if is_path_like(file):
|
|
||||||
with open(file, 'w' if write else 'r', encoding='utf-8') as f:
|
|
||||||
yield f
|
|
||||||
else:
|
|
||||||
if write:
|
|
||||||
file.truncate(0)
|
|
||||||
yield file
|
|
||||||
|
|
||||||
def _really_save(self, f, ignore_discard=False, ignore_expires=False):
|
|
||||||
now = time.time()
|
|
||||||
for cookie in self:
|
|
||||||
if (not ignore_discard and cookie.discard
|
|
||||||
or not ignore_expires and cookie.is_expired(now)):
|
|
||||||
continue
|
|
||||||
name, value = cookie.name, cookie.value
|
|
||||||
if value is None:
|
|
||||||
# cookies.txt regards 'Set-Cookie: foo' as a cookie
|
|
||||||
# with no name, whereas http.cookiejar regards it as a
|
|
||||||
# cookie with no value.
|
|
||||||
name, value = '', name
|
|
||||||
f.write('%s\n' % '\t'.join((
|
|
||||||
cookie.domain,
|
|
||||||
self._true_or_false(cookie.domain.startswith('.')),
|
|
||||||
cookie.path,
|
|
||||||
self._true_or_false(cookie.secure),
|
|
||||||
str_or_none(cookie.expires, default=''),
|
|
||||||
name, value
|
|
||||||
)))
|
|
||||||
|
|
||||||
def save(self, filename=None, *args, **kwargs):
|
|
||||||
"""
|
|
||||||
Save cookies to a file.
|
|
||||||
Code is taken from CPython 3.6
|
|
||||||
https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
|
|
||||||
|
|
||||||
if filename is None:
|
|
||||||
if self.filename is not None:
|
|
||||||
filename = self.filename
|
|
||||||
else:
|
|
||||||
raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
|
|
||||||
|
|
||||||
# Store session cookies with `expires` set to 0 instead of an empty string
|
|
||||||
for cookie in self:
|
|
||||||
if cookie.expires is None:
|
|
||||||
cookie.expires = 0
|
|
||||||
|
|
||||||
with self.open(filename, write=True) as f:
|
|
||||||
f.write(self._HEADER)
|
|
||||||
self._really_save(f, *args, **kwargs)
|
|
||||||
|
|
||||||
def load(self, filename=None, ignore_discard=False, ignore_expires=False):
|
|
||||||
"""Load cookies from a file."""
|
|
||||||
if filename is None:
|
|
||||||
if self.filename is not None:
|
|
||||||
filename = self.filename
|
|
||||||
else:
|
|
||||||
raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
|
|
||||||
|
|
||||||
def prepare_line(line):
|
|
||||||
if line.startswith(self._HTTPONLY_PREFIX):
|
|
||||||
line = line[len(self._HTTPONLY_PREFIX):]
|
|
||||||
# comments and empty lines are fine
|
|
||||||
if line.startswith('#') or not line.strip():
|
|
||||||
return line
|
|
||||||
cookie_list = line.split('\t')
|
|
||||||
if len(cookie_list) != self._ENTRY_LEN:
|
|
||||||
raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
|
|
||||||
cookie = self._CookieFileEntry(*cookie_list)
|
|
||||||
if cookie.expires_at and not cookie.expires_at.isdigit():
|
|
||||||
raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
|
|
||||||
return line
|
|
||||||
|
|
||||||
cf = io.StringIO()
|
|
||||||
with self.open(filename) as f:
|
|
||||||
for line in f:
|
|
||||||
try:
|
|
||||||
cf.write(prepare_line(line))
|
|
||||||
except http.cookiejar.LoadError as e:
|
|
||||||
if f'{line.strip()} '[0] in '[{"':
|
|
||||||
raise http.cookiejar.LoadError(
|
|
||||||
'Cookies file must be Netscape formatted, not JSON. See '
|
|
||||||
'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
|
|
||||||
write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
|
|
||||||
continue
|
|
||||||
cf.seek(0)
|
|
||||||
self._really_load(cf, filename, ignore_discard, ignore_expires)
|
|
||||||
# Session cookies are denoted by either `expires` field set to
|
|
||||||
# an empty string or 0. MozillaCookieJar only recognizes the former
|
|
||||||
# (see [1]). So we need force the latter to be recognized as session
|
|
||||||
# cookies on our own.
|
|
||||||
# Session cookies may be important for cookies-based authentication,
|
|
||||||
# e.g. usually, when user does not check 'Remember me' check box while
|
|
||||||
# logging in on a site, some important cookies are stored as session
|
|
||||||
# cookies so that not recognizing them will result in failed login.
|
|
||||||
# 1. https://bugs.python.org/issue17164
|
|
||||||
for cookie in self:
|
|
||||||
# Treat `expires=0` cookies as session cookies
|
|
||||||
if cookie.expires == 0:
|
|
||||||
cookie.expires = None
|
|
||||||
cookie.discard = True
|
|
||||||
|
|
||||||
|
|
||||||
class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
|
class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
|
||||||
def __init__(self, cookiejar=None):
|
def __init__(self, cookiejar=None):
|
||||||
urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
|
urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
|
||||||
|
@ -1711,61 +1538,44 @@ class YoutubeDLRedirectHandler(urllib.request.HTTPRedirectHandler):
|
||||||
|
|
||||||
The code is based on HTTPRedirectHandler implementation from CPython [1].
|
The code is based on HTTPRedirectHandler implementation from CPython [1].
|
||||||
|
|
||||||
This redirect handler solves two issues:
|
This redirect handler fixes and improves the logic to better align with RFC7261
|
||||||
- ensures redirect URL is always unicode under python 2
|
and what browsers tend to do [2][3]
|
||||||
- introduces support for experimental HTTP response status code
|
|
||||||
308 Permanent Redirect [2] used by some sites [3]
|
|
||||||
|
|
||||||
1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
|
1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
|
||||||
2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
|
2. https://datatracker.ietf.org/doc/html/rfc7231
|
||||||
3. https://github.com/ytdl-org/youtube-dl/issues/28768
|
3. https://github.com/python/cpython/issues/91306
|
||||||
"""
|
"""
|
||||||
|
|
||||||
http_error_301 = http_error_303 = http_error_307 = http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
|
http_error_301 = http_error_303 = http_error_307 = http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
|
||||||
|
|
||||||
def redirect_request(self, req, fp, code, msg, headers, newurl):
|
def redirect_request(self, req, fp, code, msg, headers, newurl):
|
||||||
"""Return a Request or None in response to a redirect.
|
if code not in (301, 302, 303, 307, 308):
|
||||||
|
|
||||||
This is called by the http_error_30x methods when a
|
|
||||||
redirection response is received. If a redirection should
|
|
||||||
take place, return a new Request to allow http_error_30x to
|
|
||||||
perform the redirect. Otherwise, raise HTTPError if no-one
|
|
||||||
else should try to handle this url. Return None if you can't
|
|
||||||
but another Handler might.
|
|
||||||
"""
|
|
||||||
m = req.get_method()
|
|
||||||
if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
|
|
||||||
or code in (301, 302, 303) and m == "POST")):
|
|
||||||
raise urllib.error.HTTPError(req.full_url, code, msg, headers, fp)
|
raise urllib.error.HTTPError(req.full_url, code, msg, headers, fp)
|
||||||
# Strictly (according to RFC 2616), 301 or 302 in response to
|
|
||||||
# a POST MUST NOT cause a redirection without confirmation
|
|
||||||
# from the user (of urllib.request, in this case). In practice,
|
|
||||||
# essentially all clients do redirect in this case, so we do
|
|
||||||
# the same.
|
|
||||||
|
|
||||||
# Be conciliant with URIs containing a space. This is mainly
|
|
||||||
# redundant with the more complete encoding done in http_error_302(),
|
|
||||||
# but it is kept for compatibility with other callers.
|
|
||||||
newurl = newurl.replace(' ', '%20')
|
|
||||||
|
|
||||||
CONTENT_HEADERS = ("content-length", "content-type")
|
|
||||||
# NB: don't use dict comprehension for python 2.6 compatibility
|
|
||||||
newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS}
|
|
||||||
|
|
||||||
|
new_method = req.get_method()
|
||||||
|
new_data = req.data
|
||||||
|
remove_headers = []
|
||||||
# A 303 must either use GET or HEAD for subsequent request
|
# A 303 must either use GET or HEAD for subsequent request
|
||||||
# https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
|
# https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
|
||||||
if code == 303 and m != 'HEAD':
|
if code == 303 and req.get_method() != 'HEAD':
|
||||||
m = 'GET'
|
new_method = 'GET'
|
||||||
# 301 and 302 redirects are commonly turned into a GET from a POST
|
# 301 and 302 redirects are commonly turned into a GET from a POST
|
||||||
# for subsequent requests by browsers, so we'll do the same.
|
# for subsequent requests by browsers, so we'll do the same.
|
||||||
# https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
|
# https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
|
||||||
# https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
|
# https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
|
||||||
if code in (301, 302) and m == 'POST':
|
elif code in (301, 302) and req.get_method() == 'POST':
|
||||||
m = 'GET'
|
new_method = 'GET'
|
||||||
|
|
||||||
|
# only remove payload if method changed (e.g. POST to GET)
|
||||||
|
if new_method != req.get_method():
|
||||||
|
new_data = None
|
||||||
|
remove_headers.extend(['Content-Length', 'Content-Type'])
|
||||||
|
|
||||||
|
new_headers = {k: v for k, v in req.headers.items() if k.lower() not in remove_headers}
|
||||||
|
|
||||||
return urllib.request.Request(
|
return urllib.request.Request(
|
||||||
newurl, headers=newheaders, origin_req_host=req.origin_req_host,
|
newurl, headers=new_headers, origin_req_host=req.origin_req_host,
|
||||||
unverifiable=True, method=m)
|
unverifiable=True, method=new_method, data=new_data)
|
||||||
|
|
||||||
|
|
||||||
def extract_timezone(date_str):
|
def extract_timezone(date_str):
|
||||||
|
@ -2011,20 +1821,14 @@ def __contains__(self, date):
|
||||||
date = date_from_str(date)
|
date = date_from_str(date)
|
||||||
return self.start <= date <= self.end
|
return self.start <= date <= self.end
|
||||||
|
|
||||||
def __str__(self):
|
def __repr__(self):
|
||||||
return f'{self.start.isoformat()} - {self.end.isoformat()}'
|
return f'{__name__}.{type(self).__name__}({self.start.isoformat()!r}, {self.end.isoformat()!r})'
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
return (isinstance(other, DateRange)
|
return (isinstance(other, DateRange)
|
||||||
and self.start == other.start and self.end == other.end)
|
and self.start == other.start and self.end == other.end)
|
||||||
|
|
||||||
|
|
||||||
def platform_name():
|
|
||||||
""" Returns the platform name as a str """
|
|
||||||
deprecation_warning(f'"{__name__}.platform_name" is deprecated, use "platform.platform" instead')
|
|
||||||
return platform.platform()
|
|
||||||
|
|
||||||
|
|
||||||
@functools.cache
|
@functools.cache
|
||||||
def system_identifier():
|
def system_identifier():
|
||||||
python_implementation = platform.python_implementation()
|
python_implementation = platform.python_implementation()
|
||||||
|
@ -2076,7 +1880,7 @@ def write_string(s, out=None, encoding=None):
|
||||||
|
|
||||||
|
|
||||||
def deprecation_warning(msg, *, printer=None, stacklevel=0, **kwargs):
|
def deprecation_warning(msg, *, printer=None, stacklevel=0, **kwargs):
|
||||||
from . import _IN_CLI
|
from .. import _IN_CLI
|
||||||
if _IN_CLI:
|
if _IN_CLI:
|
||||||
if msg in deprecation_warning._cache:
|
if msg in deprecation_warning._cache:
|
||||||
return
|
return
|
||||||
|
@ -3286,14 +3090,10 @@ def is_iterable_like(x, allowed_types=collections.abc.Iterable, blocked_types=NO
|
||||||
|
|
||||||
|
|
||||||
def variadic(x, allowed_types=NO_DEFAULT):
|
def variadic(x, allowed_types=NO_DEFAULT):
|
||||||
return x if is_iterable_like(x, blocked_types=allowed_types) else (x,)
|
if not isinstance(allowed_types, (tuple, type)):
|
||||||
|
deprecation_warning('allowed_types should be a tuple or a type')
|
||||||
|
allowed_types = tuple(allowed_types)
|
||||||
def dict_get(d, key_or_keys, default=None, skip_false_values=True):
|
return x if is_iterable_like(x, blocked_types=allowed_types) else (x, )
|
||||||
for val in map(d.get, variadic(key_or_keys)):
|
|
||||||
if val is not None and (val or not skip_false_values):
|
|
||||||
return val
|
|
||||||
return default
|
|
||||||
|
|
||||||
|
|
||||||
def try_call(*funcs, expected_type=None, args=[], kwargs={}):
|
def try_call(*funcs, expected_type=None, args=[], kwargs={}):
|
||||||
|
@ -3533,7 +3333,7 @@ def is_outdated_version(version, limit, assume_new=True):
|
||||||
def ytdl_is_updateable():
|
def ytdl_is_updateable():
|
||||||
""" Returns if yt-dlp can be updated with -U """
|
""" Returns if yt-dlp can be updated with -U """
|
||||||
|
|
||||||
from .update import is_non_updateable
|
from ..update import is_non_updateable
|
||||||
|
|
||||||
return not is_non_updateable()
|
return not is_non_updateable()
|
||||||
|
|
||||||
|
@ -3543,10 +3343,6 @@ def args_to_str(args):
|
||||||
return ' '.join(compat_shlex_quote(a) for a in args)
|
return ' '.join(compat_shlex_quote(a) for a in args)
|
||||||
|
|
||||||
|
|
||||||
def error_to_compat_str(err):
|
|
||||||
return str(err)
|
|
||||||
|
|
||||||
|
|
||||||
def error_to_str(err):
|
def error_to_str(err):
|
||||||
return f'{type(err).__name__}: {err}'
|
return f'{type(err).__name__}: {err}'
|
||||||
|
|
||||||
|
@ -3633,7 +3429,7 @@ def mimetype2ext(mt, default=NO_DEFAULT):
|
||||||
mimetype = mt.partition(';')[0].strip().lower()
|
mimetype = mt.partition(';')[0].strip().lower()
|
||||||
_, _, subtype = mimetype.rpartition('/')
|
_, _, subtype = mimetype.rpartition('/')
|
||||||
|
|
||||||
ext = traverse_obj(MAP, mimetype, subtype, subtype.rsplit('+')[-1])
|
ext = traversal.traverse_obj(MAP, mimetype, subtype, subtype.rsplit('+')[-1])
|
||||||
if ext:
|
if ext:
|
||||||
return ext
|
return ext
|
||||||
elif default is not NO_DEFAULT:
|
elif default is not NO_DEFAULT:
|
||||||
|
@ -3665,7 +3461,7 @@ def parse_codecs(codecs_str):
|
||||||
vcodec = full_codec
|
vcodec = full_codec
|
||||||
if parts[0] in ('dvh1', 'dvhe'):
|
if parts[0] in ('dvh1', 'dvhe'):
|
||||||
hdr = 'DV'
|
hdr = 'DV'
|
||||||
elif parts[0] == 'av1' and traverse_obj(parts, 3) == '10':
|
elif parts[0] == 'av1' and traversal.traverse_obj(parts, 3) == '10':
|
||||||
hdr = 'HDR10'
|
hdr = 'HDR10'
|
||||||
elif parts[:2] == ['vp9', '2']:
|
elif parts[:2] == ['vp9', '2']:
|
||||||
hdr = 'HDR10'
|
hdr = 'HDR10'
|
||||||
|
@ -3711,8 +3507,7 @@ def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
sanitize_codec = functools.partial(
|
sanitize_codec = functools.partial(try_get, getter=lambda x: x[0].split('.')[0].replace('0', ''))
|
||||||
try_get, getter=lambda x: x[0].split('.')[0].replace('0', '').lower())
|
|
||||||
vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
|
vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
|
||||||
|
|
||||||
for ext in preferences or COMPATIBLE_CODECS.keys():
|
for ext in preferences or COMPATIBLE_CODECS.keys():
|
||||||
|
@ -5093,12 +4888,6 @@ def decode_base_n(string, n=None, table=None):
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def decode_base(value, digits):
|
|
||||||
deprecation_warning(f'{__name__}.decode_base is deprecated and may be removed '
|
|
||||||
f'in a future version. Use {__name__}.decode_base_n instead')
|
|
||||||
return decode_base_n(value, table=digits)
|
|
||||||
|
|
||||||
|
|
||||||
def decode_packed_codes(code):
|
def decode_packed_codes(code):
|
||||||
mobj = re.search(PACKED_CODES_RE, code)
|
mobj = re.search(PACKED_CODES_RE, code)
|
||||||
obfuscated_code, base, count, symbols = mobj.groups()
|
obfuscated_code, base, count, symbols = mobj.groups()
|
||||||
|
@ -5143,113 +4932,6 @@ def urshift(val, n):
|
||||||
return val >> n if val >= 0 else (val + 0x100000000) >> n
|
return val >> n if val >= 0 else (val + 0x100000000) >> n
|
||||||
|
|
||||||
|
|
||||||
# Based on png2str() written by @gdkchan and improved by @yokrysty
|
|
||||||
# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
|
|
||||||
def decode_png(png_data):
|
|
||||||
# Reference: https://www.w3.org/TR/PNG/
|
|
||||||
header = png_data[8:]
|
|
||||||
|
|
||||||
if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
|
|
||||||
raise OSError('Not a valid PNG file.')
|
|
||||||
|
|
||||||
int_map = {1: '>B', 2: '>H', 4: '>I'}
|
|
||||||
unpack_integer = lambda x: struct.unpack(int_map[len(x)], x)[0]
|
|
||||||
|
|
||||||
chunks = []
|
|
||||||
|
|
||||||
while header:
|
|
||||||
length = unpack_integer(header[:4])
|
|
||||||
header = header[4:]
|
|
||||||
|
|
||||||
chunk_type = header[:4]
|
|
||||||
header = header[4:]
|
|
||||||
|
|
||||||
chunk_data = header[:length]
|
|
||||||
header = header[length:]
|
|
||||||
|
|
||||||
header = header[4:] # Skip CRC
|
|
||||||
|
|
||||||
chunks.append({
|
|
||||||
'type': chunk_type,
|
|
||||||
'length': length,
|
|
||||||
'data': chunk_data
|
|
||||||
})
|
|
||||||
|
|
||||||
ihdr = chunks[0]['data']
|
|
||||||
|
|
||||||
width = unpack_integer(ihdr[:4])
|
|
||||||
height = unpack_integer(ihdr[4:8])
|
|
||||||
|
|
||||||
idat = b''
|
|
||||||
|
|
||||||
for chunk in chunks:
|
|
||||||
if chunk['type'] == b'IDAT':
|
|
||||||
idat += chunk['data']
|
|
||||||
|
|
||||||
if not idat:
|
|
||||||
raise OSError('Unable to read PNG data.')
|
|
||||||
|
|
||||||
decompressed_data = bytearray(zlib.decompress(idat))
|
|
||||||
|
|
||||||
stride = width * 3
|
|
||||||
pixels = []
|
|
||||||
|
|
||||||
def _get_pixel(idx):
|
|
||||||
x = idx % stride
|
|
||||||
y = idx // stride
|
|
||||||
return pixels[y][x]
|
|
||||||
|
|
||||||
for y in range(height):
|
|
||||||
basePos = y * (1 + stride)
|
|
||||||
filter_type = decompressed_data[basePos]
|
|
||||||
|
|
||||||
current_row = []
|
|
||||||
|
|
||||||
pixels.append(current_row)
|
|
||||||
|
|
||||||
for x in range(stride):
|
|
||||||
color = decompressed_data[1 + basePos + x]
|
|
||||||
basex = y * stride + x
|
|
||||||
left = 0
|
|
||||||
up = 0
|
|
||||||
|
|
||||||
if x > 2:
|
|
||||||
left = _get_pixel(basex - 3)
|
|
||||||
if y > 0:
|
|
||||||
up = _get_pixel(basex - stride)
|
|
||||||
|
|
||||||
if filter_type == 1: # Sub
|
|
||||||
color = (color + left) & 0xff
|
|
||||||
elif filter_type == 2: # Up
|
|
||||||
color = (color + up) & 0xff
|
|
||||||
elif filter_type == 3: # Average
|
|
||||||
color = (color + ((left + up) >> 1)) & 0xff
|
|
||||||
elif filter_type == 4: # Paeth
|
|
||||||
a = left
|
|
||||||
b = up
|
|
||||||
c = 0
|
|
||||||
|
|
||||||
if x > 2 and y > 0:
|
|
||||||
c = _get_pixel(basex - stride - 3)
|
|
||||||
|
|
||||||
p = a + b - c
|
|
||||||
|
|
||||||
pa = abs(p - a)
|
|
||||||
pb = abs(p - b)
|
|
||||||
pc = abs(p - c)
|
|
||||||
|
|
||||||
if pa <= pb and pa <= pc:
|
|
||||||
color = (color + a) & 0xff
|
|
||||||
elif pb <= pc:
|
|
||||||
color = (color + b) & 0xff
|
|
||||||
else:
|
|
||||||
color = (color + c) & 0xff
|
|
||||||
|
|
||||||
current_row.append(color)
|
|
||||||
|
|
||||||
return width, height, pixels
|
|
||||||
|
|
||||||
|
|
||||||
def write_xattr(path, key, value):
|
def write_xattr(path, key, value):
|
||||||
# Windows: Write xattrs to NTFS Alternate Data Streams:
|
# Windows: Write xattrs to NTFS Alternate Data Streams:
|
||||||
# http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
|
# http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
|
||||||
|
@ -5408,8 +5090,8 @@ def to_high_limit_path(path):
|
||||||
|
|
||||||
|
|
||||||
def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
|
def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
|
||||||
val = traverse_obj(obj, *variadic(field))
|
val = traversal.traverse_obj(obj, *variadic(field))
|
||||||
if (not val and val != 0) if ignore is NO_DEFAULT else val in variadic(ignore):
|
if not val if ignore is NO_DEFAULT else val in variadic(ignore):
|
||||||
return default
|
return default
|
||||||
return template % func(val)
|
return template % func(val)
|
||||||
|
|
||||||
|
@ -5446,12 +5128,12 @@ def make_dir(path, to_screen=None):
|
||||||
return True
|
return True
|
||||||
except OSError as err:
|
except OSError as err:
|
||||||
if callable(to_screen) is not None:
|
if callable(to_screen) is not None:
|
||||||
to_screen('unable to create directory ' + error_to_compat_str(err))
|
to_screen(f'unable to create directory {err}')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def get_executable_path():
|
def get_executable_path():
|
||||||
from .update import _get_variant_and_executable_path
|
from ..update import _get_variant_and_executable_path
|
||||||
|
|
||||||
return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
|
return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
|
||||||
|
|
||||||
|
@ -5475,244 +5157,6 @@ def get_system_config_dirs(package_name):
|
||||||
yield os.path.join('/etc', package_name)
|
yield os.path.join('/etc', package_name)
|
||||||
|
|
||||||
|
|
||||||
def traverse_obj(
|
|
||||||
obj, *paths, default=NO_DEFAULT, expected_type=None, get_all=True,
|
|
||||||
casesense=True, is_user_input=False, traverse_string=False):
|
|
||||||
"""
|
|
||||||
Safely traverse nested `dict`s and `Iterable`s
|
|
||||||
|
|
||||||
>>> obj = [{}, {"key": "value"}]
|
|
||||||
>>> traverse_obj(obj, (1, "key"))
|
|
||||||
"value"
|
|
||||||
|
|
||||||
Each of the provided `paths` is tested and the first producing a valid result will be returned.
|
|
||||||
The next path will also be tested if the path branched but no results could be found.
|
|
||||||
Supported values for traversal are `Mapping`, `Iterable` and `re.Match`.
|
|
||||||
Unhelpful values (`{}`, `None`) are treated as the absence of a value and discarded.
|
|
||||||
|
|
||||||
The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
|
|
||||||
|
|
||||||
The keys in the path can be one of:
|
|
||||||
- `None`: Return the current object.
|
|
||||||
- `set`: Requires the only item in the set to be a type or function,
|
|
||||||
like `{type}`/`{func}`. If a `type`, returns only values
|
|
||||||
of this type. If a function, returns `func(obj)`.
|
|
||||||
- `str`/`int`: Return `obj[key]`. For `re.Match`, return `obj.group(key)`.
|
|
||||||
- `slice`: Branch out and return all values in `obj[key]`.
|
|
||||||
- `Ellipsis`: Branch out and return a list of all values.
|
|
||||||
- `tuple`/`list`: Branch out and return a list of all matching values.
|
|
||||||
Read as: `[traverse_obj(obj, branch) for branch in branches]`.
|
|
||||||
- `function`: Branch out and return values filtered by the function.
|
|
||||||
Read as: `[value for key, value in obj if function(key, value)]`.
|
|
||||||
For `Iterable`s, `key` is the index of the value.
|
|
||||||
For `re.Match`es, `key` is the group number (0 = full match)
|
|
||||||
as well as additionally any group names, if given.
|
|
||||||
- `dict` Transform the current object and return a matching dict.
|
|
||||||
Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
|
|
||||||
|
|
||||||
`tuple`, `list`, and `dict` all support nested paths and branches.
|
|
||||||
|
|
||||||
@params paths Paths which to traverse by.
|
|
||||||
@param default Value to return if the paths do not match.
|
|
||||||
If the last key in the path is a `dict`, it will apply to each value inside
|
|
||||||
the dict instead, depth first. Try to avoid if using nested `dict` keys.
|
|
||||||
@param expected_type If a `type`, only accept final values of this type.
|
|
||||||
If any other callable, try to call the function on each result.
|
|
||||||
If the last key in the path is a `dict`, it will apply to each value inside
|
|
||||||
the dict instead, recursively. This does respect branching paths.
|
|
||||||
@param get_all If `False`, return the first matching result, otherwise all matching ones.
|
|
||||||
@param casesense If `False`, consider string dictionary keys as case insensitive.
|
|
||||||
|
|
||||||
The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
|
|
||||||
|
|
||||||
@param is_user_input Whether the keys are generated from user input.
|
|
||||||
If `True` strings get converted to `int`/`slice` if needed.
|
|
||||||
@param traverse_string Whether to traverse into objects as strings.
|
|
||||||
If `True`, any non-compatible object will first be
|
|
||||||
converted into a string and then traversed into.
|
|
||||||
The return value of that path will be a string instead,
|
|
||||||
not respecting any further branching.
|
|
||||||
|
|
||||||
|
|
||||||
@returns The result of the object traversal.
|
|
||||||
If successful, `get_all=True`, and the path branches at least once,
|
|
||||||
then a list of results is returned instead.
|
|
||||||
If no `default` is given and the last path branches, a `list` of results
|
|
||||||
is always returned. If a path ends on a `dict` that result will always be a `dict`.
|
|
||||||
"""
|
|
||||||
casefold = lambda k: k.casefold() if isinstance(k, str) else k
|
|
||||||
|
|
||||||
if isinstance(expected_type, type):
|
|
||||||
type_test = lambda val: val if isinstance(val, expected_type) else None
|
|
||||||
else:
|
|
||||||
type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
|
|
||||||
|
|
||||||
def apply_key(key, obj, is_last):
|
|
||||||
branching = False
|
|
||||||
result = None
|
|
||||||
|
|
||||||
if obj is None and traverse_string:
|
|
||||||
if key is ... or callable(key) or isinstance(key, slice):
|
|
||||||
branching = True
|
|
||||||
result = ()
|
|
||||||
|
|
||||||
elif key is None:
|
|
||||||
result = obj
|
|
||||||
|
|
||||||
elif isinstance(key, set):
|
|
||||||
assert len(key) == 1, 'Set should only be used to wrap a single item'
|
|
||||||
item = next(iter(key))
|
|
||||||
if isinstance(item, type):
|
|
||||||
if isinstance(obj, item):
|
|
||||||
result = obj
|
|
||||||
else:
|
|
||||||
result = try_call(item, args=(obj,))
|
|
||||||
|
|
||||||
elif isinstance(key, (list, tuple)):
|
|
||||||
branching = True
|
|
||||||
result = itertools.chain.from_iterable(
|
|
||||||
apply_path(obj, branch, is_last)[0] for branch in key)
|
|
||||||
|
|
||||||
elif key is ...:
|
|
||||||
branching = True
|
|
||||||
if isinstance(obj, collections.abc.Mapping):
|
|
||||||
result = obj.values()
|
|
||||||
elif is_iterable_like(obj):
|
|
||||||
result = obj
|
|
||||||
elif isinstance(obj, re.Match):
|
|
||||||
result = obj.groups()
|
|
||||||
elif traverse_string:
|
|
||||||
branching = False
|
|
||||||
result = str(obj)
|
|
||||||
else:
|
|
||||||
result = ()
|
|
||||||
|
|
||||||
elif callable(key):
|
|
||||||
branching = True
|
|
||||||
if isinstance(obj, collections.abc.Mapping):
|
|
||||||
iter_obj = obj.items()
|
|
||||||
elif is_iterable_like(obj):
|
|
||||||
iter_obj = enumerate(obj)
|
|
||||||
elif isinstance(obj, re.Match):
|
|
||||||
iter_obj = itertools.chain(
|
|
||||||
enumerate((obj.group(), *obj.groups())),
|
|
||||||
obj.groupdict().items())
|
|
||||||
elif traverse_string:
|
|
||||||
branching = False
|
|
||||||
iter_obj = enumerate(str(obj))
|
|
||||||
else:
|
|
||||||
iter_obj = ()
|
|
||||||
|
|
||||||
result = (v for k, v in iter_obj if try_call(key, args=(k, v)))
|
|
||||||
if not branching: # string traversal
|
|
||||||
result = ''.join(result)
|
|
||||||
|
|
||||||
elif isinstance(key, dict):
|
|
||||||
iter_obj = ((k, _traverse_obj(obj, v, False, is_last)) for k, v in key.items())
|
|
||||||
result = {
|
|
||||||
k: v if v is not None else default for k, v in iter_obj
|
|
||||||
if v is not None or default is not NO_DEFAULT
|
|
||||||
} or None
|
|
||||||
|
|
||||||
elif isinstance(obj, collections.abc.Mapping):
|
|
||||||
result = (try_call(obj.get, args=(key,)) if casesense or try_call(obj.__contains__, args=(key,)) else
|
|
||||||
next((v for k, v in obj.items() if casefold(k) == key), None))
|
|
||||||
|
|
||||||
elif isinstance(obj, re.Match):
|
|
||||||
if isinstance(key, int) or casesense:
|
|
||||||
with contextlib.suppress(IndexError):
|
|
||||||
result = obj.group(key)
|
|
||||||
|
|
||||||
elif isinstance(key, str):
|
|
||||||
result = next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
|
|
||||||
|
|
||||||
elif isinstance(key, (int, slice)):
|
|
||||||
if is_iterable_like(obj, collections.abc.Sequence):
|
|
||||||
branching = isinstance(key, slice)
|
|
||||||
with contextlib.suppress(IndexError):
|
|
||||||
result = obj[key]
|
|
||||||
elif traverse_string:
|
|
||||||
with contextlib.suppress(IndexError):
|
|
||||||
result = str(obj)[key]
|
|
||||||
|
|
||||||
return branching, result if branching else (result,)
|
|
||||||
|
|
||||||
def lazy_last(iterable):
|
|
||||||
iterator = iter(iterable)
|
|
||||||
prev = next(iterator, NO_DEFAULT)
|
|
||||||
if prev is NO_DEFAULT:
|
|
||||||
return
|
|
||||||
|
|
||||||
for item in iterator:
|
|
||||||
yield False, prev
|
|
||||||
prev = item
|
|
||||||
|
|
||||||
yield True, prev
|
|
||||||
|
|
||||||
def apply_path(start_obj, path, test_type):
|
|
||||||
objs = (start_obj,)
|
|
||||||
has_branched = False
|
|
||||||
|
|
||||||
key = None
|
|
||||||
for last, key in lazy_last(variadic(path, (str, bytes, dict, set))):
|
|
||||||
if is_user_input and isinstance(key, str):
|
|
||||||
if key == ':':
|
|
||||||
key = ...
|
|
||||||
elif ':' in key:
|
|
||||||
key = slice(*map(int_or_none, key.split(':')))
|
|
||||||
elif int_or_none(key) is not None:
|
|
||||||
key = int(key)
|
|
||||||
|
|
||||||
if not casesense and isinstance(key, str):
|
|
||||||
key = key.casefold()
|
|
||||||
|
|
||||||
if __debug__ and callable(key):
|
|
||||||
# Verify function signature
|
|
||||||
inspect.signature(key).bind(None, None)
|
|
||||||
|
|
||||||
new_objs = []
|
|
||||||
for obj in objs:
|
|
||||||
branching, results = apply_key(key, obj, last)
|
|
||||||
has_branched |= branching
|
|
||||||
new_objs.append(results)
|
|
||||||
|
|
||||||
objs = itertools.chain.from_iterable(new_objs)
|
|
||||||
|
|
||||||
if test_type and not isinstance(key, (dict, list, tuple)):
|
|
||||||
objs = map(type_test, objs)
|
|
||||||
|
|
||||||
return objs, has_branched, isinstance(key, dict)
|
|
||||||
|
|
||||||
def _traverse_obj(obj, path, allow_empty, test_type):
|
|
||||||
results, has_branched, is_dict = apply_path(obj, path, test_type)
|
|
||||||
results = LazyList(item for item in results if item not in (None, {}))
|
|
||||||
if get_all and has_branched:
|
|
||||||
if results:
|
|
||||||
return results.exhaust()
|
|
||||||
if allow_empty:
|
|
||||||
return [] if default is NO_DEFAULT else default
|
|
||||||
return None
|
|
||||||
|
|
||||||
return results[0] if results else {} if allow_empty and is_dict else None
|
|
||||||
|
|
||||||
for index, path in enumerate(paths, 1):
|
|
||||||
result = _traverse_obj(obj, path, index == len(paths), True)
|
|
||||||
if result is not None:
|
|
||||||
return result
|
|
||||||
|
|
||||||
return None if default is NO_DEFAULT else default
|
|
||||||
|
|
||||||
|
|
||||||
def traverse_dict(dictn, keys, casesense=True):
|
|
||||||
deprecation_warning(f'"{__name__}.traverse_dict" is deprecated and may be removed '
|
|
||||||
f'in a future version. Use "{__name__}.traverse_obj" instead')
|
|
||||||
return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
|
|
||||||
|
|
||||||
|
|
||||||
def get_first(obj, keys, **kwargs):
|
|
||||||
return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
|
|
||||||
|
|
||||||
|
|
||||||
def time_seconds(**kwargs):
|
def time_seconds(**kwargs):
|
||||||
"""
|
"""
|
||||||
Returns TZ-aware time in seconds since the epoch (1970-01-01T00:00:00Z)
|
Returns TZ-aware time in seconds since the epoch (1970-01-01T00:00:00Z)
|
||||||
|
@ -5808,7 +5252,7 @@ def number_of_digits(number):
|
||||||
|
|
||||||
def join_nonempty(*values, delim='-', from_dict=None):
|
def join_nonempty(*values, delim='-', from_dict=None):
|
||||||
if from_dict is not None:
|
if from_dict is not None:
|
||||||
values = (traverse_obj(from_dict, variadic(v)) for v in values)
|
values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values)
|
||||||
return delim.join(map(str, filter(None, values)))
|
return delim.join(map(str, filter(None, values)))
|
||||||
|
|
||||||
|
|
||||||
|
@ -6519,15 +5963,3 @@ def calculate_preference(self, format):
|
||||||
format['abr'] = format.get('tbr') - format.get('vbr', 0)
|
format['abr'] = format.get('tbr') - format.get('vbr', 0)
|
||||||
|
|
||||||
return tuple(self._calculate_field_preference(format, field) for field in self._order)
|
return tuple(self._calculate_field_preference(format, field) for field in self._order)
|
||||||
|
|
||||||
|
|
||||||
# Deprecated
|
|
||||||
has_certifi = bool(certifi)
|
|
||||||
has_websockets = bool(websockets)
|
|
||||||
|
|
||||||
|
|
||||||
def load_plugins(name, suffix, namespace):
|
|
||||||
from .plugins import load_plugins
|
|
||||||
ret = load_plugins(name, suffix)
|
|
||||||
namespace.update(ret)
|
|
||||||
return ret
|
|
254
yt_dlp/utils/traversal.py
Normal file
254
yt_dlp/utils/traversal.py
Normal file
|
@ -0,0 +1,254 @@
|
||||||
|
import collections.abc
|
||||||
|
import contextlib
|
||||||
|
import inspect
|
||||||
|
import itertools
|
||||||
|
import re
|
||||||
|
|
||||||
|
from ._utils import (
|
||||||
|
IDENTITY,
|
||||||
|
NO_DEFAULT,
|
||||||
|
LazyList,
|
||||||
|
int_or_none,
|
||||||
|
is_iterable_like,
|
||||||
|
try_call,
|
||||||
|
variadic,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def traverse_obj(
|
||||||
|
obj, *paths, default=NO_DEFAULT, expected_type=None, get_all=True,
|
||||||
|
casesense=True, is_user_input=False, traverse_string=False):
|
||||||
|
"""
|
||||||
|
Safely traverse nested `dict`s and `Iterable`s
|
||||||
|
|
||||||
|
>>> obj = [{}, {"key": "value"}]
|
||||||
|
>>> traverse_obj(obj, (1, "key"))
|
||||||
|
"value"
|
||||||
|
|
||||||
|
Each of the provided `paths` is tested and the first producing a valid result will be returned.
|
||||||
|
The next path will also be tested if the path branched but no results could be found.
|
||||||
|
Supported values for traversal are `Mapping`, `Iterable` and `re.Match`.
|
||||||
|
Unhelpful values (`{}`, `None`) are treated as the absence of a value and discarded.
|
||||||
|
|
||||||
|
The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
|
||||||
|
|
||||||
|
The keys in the path can be one of:
|
||||||
|
- `None`: Return the current object.
|
||||||
|
- `set`: Requires the only item in the set to be a type or function,
|
||||||
|
like `{type}`/`{func}`. If a `type`, returns only values
|
||||||
|
of this type. If a function, returns `func(obj)`.
|
||||||
|
- `str`/`int`: Return `obj[key]`. For `re.Match`, return `obj.group(key)`.
|
||||||
|
- `slice`: Branch out and return all values in `obj[key]`.
|
||||||
|
- `Ellipsis`: Branch out and return a list of all values.
|
||||||
|
- `tuple`/`list`: Branch out and return a list of all matching values.
|
||||||
|
Read as: `[traverse_obj(obj, branch) for branch in branches]`.
|
||||||
|
- `function`: Branch out and return values filtered by the function.
|
||||||
|
Read as: `[value for key, value in obj if function(key, value)]`.
|
||||||
|
For `Iterable`s, `key` is the index of the value.
|
||||||
|
For `re.Match`es, `key` is the group number (0 = full match)
|
||||||
|
as well as additionally any group names, if given.
|
||||||
|
- `dict` Transform the current object and return a matching dict.
|
||||||
|
Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
|
||||||
|
|
||||||
|
`tuple`, `list`, and `dict` all support nested paths and branches.
|
||||||
|
|
||||||
|
@params paths Paths which to traverse by.
|
||||||
|
@param default Value to return if the paths do not match.
|
||||||
|
If the last key in the path is a `dict`, it will apply to each value inside
|
||||||
|
the dict instead, depth first. Try to avoid if using nested `dict` keys.
|
||||||
|
@param expected_type If a `type`, only accept final values of this type.
|
||||||
|
If any other callable, try to call the function on each result.
|
||||||
|
If the last key in the path is a `dict`, it will apply to each value inside
|
||||||
|
the dict instead, recursively. This does respect branching paths.
|
||||||
|
@param get_all If `False`, return the first matching result, otherwise all matching ones.
|
||||||
|
@param casesense If `False`, consider string dictionary keys as case insensitive.
|
||||||
|
|
||||||
|
The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
|
||||||
|
|
||||||
|
@param is_user_input Whether the keys are generated from user input.
|
||||||
|
If `True` strings get converted to `int`/`slice` if needed.
|
||||||
|
@param traverse_string Whether to traverse into objects as strings.
|
||||||
|
If `True`, any non-compatible object will first be
|
||||||
|
converted into a string and then traversed into.
|
||||||
|
The return value of that path will be a string instead,
|
||||||
|
not respecting any further branching.
|
||||||
|
|
||||||
|
|
||||||
|
@returns The result of the object traversal.
|
||||||
|
If successful, `get_all=True`, and the path branches at least once,
|
||||||
|
then a list of results is returned instead.
|
||||||
|
If no `default` is given and the last path branches, a `list` of results
|
||||||
|
is always returned. If a path ends on a `dict` that result will always be a `dict`.
|
||||||
|
"""
|
||||||
|
casefold = lambda k: k.casefold() if isinstance(k, str) else k
|
||||||
|
|
||||||
|
if isinstance(expected_type, type):
|
||||||
|
type_test = lambda val: val if isinstance(val, expected_type) else None
|
||||||
|
else:
|
||||||
|
type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
|
||||||
|
|
||||||
|
def apply_key(key, obj, is_last):
|
||||||
|
branching = False
|
||||||
|
result = None
|
||||||
|
|
||||||
|
if obj is None and traverse_string:
|
||||||
|
if key is ... or callable(key) or isinstance(key, slice):
|
||||||
|
branching = True
|
||||||
|
result = ()
|
||||||
|
|
||||||
|
elif key is None:
|
||||||
|
result = obj
|
||||||
|
|
||||||
|
elif isinstance(key, set):
|
||||||
|
assert len(key) == 1, 'Set should only be used to wrap a single item'
|
||||||
|
item = next(iter(key))
|
||||||
|
if isinstance(item, type):
|
||||||
|
if isinstance(obj, item):
|
||||||
|
result = obj
|
||||||
|
else:
|
||||||
|
result = try_call(item, args=(obj,))
|
||||||
|
|
||||||
|
elif isinstance(key, (list, tuple)):
|
||||||
|
branching = True
|
||||||
|
result = itertools.chain.from_iterable(
|
||||||
|
apply_path(obj, branch, is_last)[0] for branch in key)
|
||||||
|
|
||||||
|
elif key is ...:
|
||||||
|
branching = True
|
||||||
|
if isinstance(obj, collections.abc.Mapping):
|
||||||
|
result = obj.values()
|
||||||
|
elif is_iterable_like(obj):
|
||||||
|
result = obj
|
||||||
|
elif isinstance(obj, re.Match):
|
||||||
|
result = obj.groups()
|
||||||
|
elif traverse_string:
|
||||||
|
branching = False
|
||||||
|
result = str(obj)
|
||||||
|
else:
|
||||||
|
result = ()
|
||||||
|
|
||||||
|
elif callable(key):
|
||||||
|
branching = True
|
||||||
|
if isinstance(obj, collections.abc.Mapping):
|
||||||
|
iter_obj = obj.items()
|
||||||
|
elif is_iterable_like(obj):
|
||||||
|
iter_obj = enumerate(obj)
|
||||||
|
elif isinstance(obj, re.Match):
|
||||||
|
iter_obj = itertools.chain(
|
||||||
|
enumerate((obj.group(), *obj.groups())),
|
||||||
|
obj.groupdict().items())
|
||||||
|
elif traverse_string:
|
||||||
|
branching = False
|
||||||
|
iter_obj = enumerate(str(obj))
|
||||||
|
else:
|
||||||
|
iter_obj = ()
|
||||||
|
|
||||||
|
result = (v for k, v in iter_obj if try_call(key, args=(k, v)))
|
||||||
|
if not branching: # string traversal
|
||||||
|
result = ''.join(result)
|
||||||
|
|
||||||
|
elif isinstance(key, dict):
|
||||||
|
iter_obj = ((k, _traverse_obj(obj, v, False, is_last)) for k, v in key.items())
|
||||||
|
result = {
|
||||||
|
k: v if v is not None else default for k, v in iter_obj
|
||||||
|
if v is not None or default is not NO_DEFAULT
|
||||||
|
} or None
|
||||||
|
|
||||||
|
elif isinstance(obj, collections.abc.Mapping):
|
||||||
|
result = (try_call(obj.get, args=(key,)) if casesense or try_call(obj.__contains__, args=(key,)) else
|
||||||
|
next((v for k, v in obj.items() if casefold(k) == key), None))
|
||||||
|
|
||||||
|
elif isinstance(obj, re.Match):
|
||||||
|
if isinstance(key, int) or casesense:
|
||||||
|
with contextlib.suppress(IndexError):
|
||||||
|
result = obj.group(key)
|
||||||
|
|
||||||
|
elif isinstance(key, str):
|
||||||
|
result = next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
|
||||||
|
|
||||||
|
elif isinstance(key, (int, slice)):
|
||||||
|
if is_iterable_like(obj, collections.abc.Sequence):
|
||||||
|
branching = isinstance(key, slice)
|
||||||
|
with contextlib.suppress(IndexError):
|
||||||
|
result = obj[key]
|
||||||
|
elif traverse_string:
|
||||||
|
with contextlib.suppress(IndexError):
|
||||||
|
result = str(obj)[key]
|
||||||
|
|
||||||
|
return branching, result if branching else (result,)
|
||||||
|
|
||||||
|
def lazy_last(iterable):
|
||||||
|
iterator = iter(iterable)
|
||||||
|
prev = next(iterator, NO_DEFAULT)
|
||||||
|
if prev is NO_DEFAULT:
|
||||||
|
return
|
||||||
|
|
||||||
|
for item in iterator:
|
||||||
|
yield False, prev
|
||||||
|
prev = item
|
||||||
|
|
||||||
|
yield True, prev
|
||||||
|
|
||||||
|
def apply_path(start_obj, path, test_type):
|
||||||
|
objs = (start_obj,)
|
||||||
|
has_branched = False
|
||||||
|
|
||||||
|
key = None
|
||||||
|
for last, key in lazy_last(variadic(path, (str, bytes, dict, set))):
|
||||||
|
if is_user_input and isinstance(key, str):
|
||||||
|
if key == ':':
|
||||||
|
key = ...
|
||||||
|
elif ':' in key:
|
||||||
|
key = slice(*map(int_or_none, key.split(':')))
|
||||||
|
elif int_or_none(key) is not None:
|
||||||
|
key = int(key)
|
||||||
|
|
||||||
|
if not casesense and isinstance(key, str):
|
||||||
|
key = key.casefold()
|
||||||
|
|
||||||
|
if __debug__ and callable(key):
|
||||||
|
# Verify function signature
|
||||||
|
inspect.signature(key).bind(None, None)
|
||||||
|
|
||||||
|
new_objs = []
|
||||||
|
for obj in objs:
|
||||||
|
branching, results = apply_key(key, obj, last)
|
||||||
|
has_branched |= branching
|
||||||
|
new_objs.append(results)
|
||||||
|
|
||||||
|
objs = itertools.chain.from_iterable(new_objs)
|
||||||
|
|
||||||
|
if test_type and not isinstance(key, (dict, list, tuple)):
|
||||||
|
objs = map(type_test, objs)
|
||||||
|
|
||||||
|
return objs, has_branched, isinstance(key, dict)
|
||||||
|
|
||||||
|
def _traverse_obj(obj, path, allow_empty, test_type):
|
||||||
|
results, has_branched, is_dict = apply_path(obj, path, test_type)
|
||||||
|
results = LazyList(item for item in results if item not in (None, {}))
|
||||||
|
if get_all and has_branched:
|
||||||
|
if results:
|
||||||
|
return results.exhaust()
|
||||||
|
if allow_empty:
|
||||||
|
return [] if default is NO_DEFAULT else default
|
||||||
|
return None
|
||||||
|
|
||||||
|
return results[0] if results else {} if allow_empty and is_dict else None
|
||||||
|
|
||||||
|
for index, path in enumerate(paths, 1):
|
||||||
|
result = _traverse_obj(obj, path, index == len(paths), True)
|
||||||
|
if result is not None:
|
||||||
|
return result
|
||||||
|
|
||||||
|
return None if default is NO_DEFAULT else default
|
||||||
|
|
||||||
|
|
||||||
|
def get_first(obj, *paths, **kwargs):
|
||||||
|
return traverse_obj(obj, *((..., *variadic(keys)) for keys in paths), **kwargs, get_all=False)
|
||||||
|
|
||||||
|
|
||||||
|
def dict_get(d, key_or_keys, default=None, skip_false_values=True):
|
||||||
|
for val in map(d.get, variadic(key_or_keys)):
|
||||||
|
if val is not None and (val or not skip_false_values):
|
||||||
|
return val
|
||||||
|
return default
|
Loading…
Reference in a new issue