Compare commits
129 Commits
2022.10.04
...
2022.11.11
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5e39fb982e | ||
|
|
8b644025b1 | ||
|
|
7aaf4cd2a8 | ||
|
|
8522226d2f | ||
|
|
f4b2c59cfe | ||
|
|
7c8c63529e | ||
|
|
e4221b700f | ||
|
|
bd7e919a75 | ||
|
|
f7fc8d39e9 | ||
|
|
a6858cda29 | ||
|
|
17fc3dc48a | ||
|
|
3f5c216969 | ||
|
|
e72e48c53f | ||
|
|
0cf643b234 | ||
|
|
dc3028d233 | ||
|
|
4dc23a8051 | ||
|
|
495322b95b | ||
|
|
c789fb7787 | ||
|
|
ed6bec168d | ||
|
|
0d8affc17f | ||
|
|
d9df9b4919 | ||
|
|
efdc45a6ea | ||
|
|
86973308cd | ||
|
|
c61473c1d6 | ||
|
|
8fddc232bf | ||
|
|
fad689c7b6 | ||
|
|
db6fa6960c | ||
|
|
3b87f4d943 | ||
|
|
581e86b512 | ||
|
|
8196182a12 | ||
|
|
9b383177c9 | ||
|
|
fbb0ee7747 | ||
|
|
c7e4ab278a | ||
|
|
e9ce4e9250 | ||
|
|
5da08bde9e | ||
|
|
ff48fc04d0 | ||
|
|
46d09f8707 | ||
|
|
db4678e448 | ||
|
|
a349d4d641 | ||
|
|
ac8e69dd32 | ||
|
|
96b9e9cf62 | ||
|
|
cb1553e966 | ||
|
|
0d2a0ecac3 | ||
|
|
c94df4d19d | ||
|
|
728f4b5c2e | ||
|
|
8c188d5d09 | ||
|
|
e14ea7fbd9 | ||
|
|
7053aa3a48 | ||
|
|
049565df2e | ||
|
|
cc1d3bf96b | ||
|
|
5b9f253fa0 | ||
|
|
d715b0e413 | ||
|
|
6141346d18 | ||
|
|
59a0c35865 | ||
|
|
da9a60ca0d | ||
|
|
0d113603ac | ||
|
|
2e30b46fe4 | ||
|
|
68a9a450d4 | ||
|
|
ed13a772d7 | ||
|
|
78545664bf | ||
|
|
f72218c199 | ||
|
|
58fb927ebd | ||
|
|
62b8dac490 | ||
|
|
682b4524bf | ||
|
|
9da6612b0f | ||
|
|
e63faa101c | ||
|
|
497074f044 | ||
|
|
c90c5b9bdd | ||
|
|
ad97487606 | ||
|
|
e091fb92da | ||
|
|
c9bd65185c | ||
|
|
c66ed4e2e5 | ||
|
|
2530b68d44 | ||
|
|
7d61d2306e | ||
|
|
385adffcf5 | ||
|
|
0c908911f9 | ||
|
|
c13a301a94 | ||
|
|
f47cf86eff | ||
|
|
7a26ce2641 | ||
|
|
3639df54c3 | ||
|
|
a4713ba96d | ||
|
|
5318156f1c | ||
|
|
d5d1df8afd | ||
|
|
cd5df121f3 | ||
|
|
73ac0e6b85 | ||
|
|
a7ddbc0475 | ||
|
|
8fab23301c | ||
|
|
1338ae3ba3 | ||
|
|
63c547d71c | ||
|
|
814bba3933 | ||
|
|
2576d53a31 | ||
|
|
217753f4aa | ||
|
|
42a44f01c3 | ||
|
|
9b9dad119a | ||
|
|
6dca2aa66d | ||
|
|
6678a4f0b3 | ||
|
|
d51b2816e3 | ||
|
|
34f00179db | ||
|
|
5225df50cf | ||
|
|
94dc8604dd | ||
|
|
a71b812f53 | ||
|
|
c6989aa3ae | ||
|
|
a79bf78397 | ||
|
|
82fb2357d9 | ||
|
|
13b2ae29c2 | ||
|
|
36069409ec | ||
|
|
0468a3b325 | ||
|
|
d509c1f5a3 | ||
|
|
2c98d99818 | ||
|
|
226c0f3a54 | ||
|
|
ade1fa70cb | ||
|
|
4c9a1a3ba5 | ||
|
|
1d55ebabc9 | ||
|
|
f324fe8c59 | ||
|
|
866f037344 | ||
|
|
5d14b73491 | ||
|
|
540236ce11 | ||
|
|
7b0127e1e1 | ||
|
|
f99bbfc983 | ||
|
|
3b55aaac59 | ||
|
|
2e565f5bca | ||
|
|
e02e6d86db | ||
|
|
867c66ff97 | ||
|
|
f03940963e | ||
|
|
09c127ff83 | ||
|
|
aebb4f4ba7 | ||
|
|
bf2e1ec67a | ||
|
|
98d4ec1ef2 | ||
|
|
1305b659ef |
8
.github/ISSUE_TEMPLATE/1_broken_site.yml
vendored
8
.github/ISSUE_TEMPLATE/1_broken_site.yml
vendored
@@ -18,7 +18,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a broken site
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.10.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2022.11.11** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
@@ -62,7 +62,7 @@ body:
|
||||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2022.10.04 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2022.11.11 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
@@ -70,8 +70,8 @@ body:
|
||||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2022.10.04, Current version: 2022.10.04
|
||||
yt-dlp is up to date (2022.10.04)
|
||||
Latest version: 2022.11.11, Current version: 2022.11.11
|
||||
yt-dlp is up to date (2022.11.11)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
@@ -18,7 +18,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a new site support request
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.10.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2022.11.11** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
@@ -74,7 +74,7 @@ body:
|
||||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2022.10.04 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2022.11.11 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
@@ -82,8 +82,8 @@ body:
|
||||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2022.10.04, Current version: 2022.10.04
|
||||
yt-dlp is up to date (2022.10.04)
|
||||
Latest version: 2022.11.11, Current version: 2022.11.11
|
||||
yt-dlp is up to date (2022.11.11)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
@@ -18,7 +18,7 @@ body:
|
||||
options:
|
||||
- label: I'm requesting a site-specific feature
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.10.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2022.11.11** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
@@ -70,7 +70,7 @@ body:
|
||||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2022.10.04 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2022.11.11 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
@@ -78,8 +78,8 @@ body:
|
||||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2022.10.04, Current version: 2022.10.04
|
||||
yt-dlp is up to date (2022.10.04)
|
||||
Latest version: 2022.11.11, Current version: 2022.11.11
|
||||
yt-dlp is up to date (2022.11.11)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
8
.github/ISSUE_TEMPLATE/4_bug_report.yml
vendored
8
.github/ISSUE_TEMPLATE/4_bug_report.yml
vendored
@@ -18,7 +18,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a bug unrelated to a specific site
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.10.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2022.11.11** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
@@ -55,7 +55,7 @@ body:
|
||||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2022.10.04 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2022.11.11 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
@@ -63,8 +63,8 @@ body:
|
||||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2022.10.04, Current version: 2022.10.04
|
||||
yt-dlp is up to date (2022.10.04)
|
||||
Latest version: 2022.11.11, Current version: 2022.11.11
|
||||
yt-dlp is up to date (2022.11.11)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
8
.github/ISSUE_TEMPLATE/5_feature_request.yml
vendored
8
.github/ISSUE_TEMPLATE/5_feature_request.yml
vendored
@@ -20,7 +20,7 @@ body:
|
||||
required: true
|
||||
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.10.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2022.11.11** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
|
||||
required: true
|
||||
@@ -51,7 +51,7 @@ body:
|
||||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2022.10.04 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2022.11.11 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
@@ -59,7 +59,7 @@ body:
|
||||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2022.10.04, Current version: 2022.10.04
|
||||
yt-dlp is up to date (2022.10.04)
|
||||
Latest version: 2022.11.11, Current version: 2022.11.11
|
||||
yt-dlp is up to date (2022.11.11)
|
||||
<more lines>
|
||||
render: shell
|
||||
|
||||
8
.github/ISSUE_TEMPLATE/6_question.yml
vendored
8
.github/ISSUE_TEMPLATE/6_question.yml
vendored
@@ -26,7 +26,7 @@ body:
|
||||
required: true
|
||||
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.10.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2022.11.11** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates
|
||||
required: true
|
||||
@@ -57,7 +57,7 @@ body:
|
||||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2022.10.04 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2022.11.11 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
@@ -65,7 +65,7 @@ body:
|
||||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2022.10.04, Current version: 2022.10.04
|
||||
yt-dlp is up to date (2022.10.04)
|
||||
Latest version: 2022.11.11, Current version: 2022.11.11
|
||||
yt-dlp is up to date (2022.11.11)
|
||||
<more lines>
|
||||
render: shell
|
||||
|
||||
99
.github/workflows/build.yml
vendored
99
.github/workflows/build.yml
vendored
@@ -1,8 +1,12 @@
|
||||
name: Build
|
||||
on: workflow_dispatch
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
prepare:
|
||||
permissions:
|
||||
contents: write # for push_release
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
version_suffix: ${{ steps.version_suffix.outputs.version_suffix }}
|
||||
@@ -21,7 +25,7 @@ jobs:
|
||||
env:
|
||||
PUSH_VERSION_COMMIT: ${{ secrets.PUSH_VERSION_COMMIT }}
|
||||
if: "env.PUSH_VERSION_COMMIT == ''"
|
||||
run: echo ::set-output name=version_suffix::$(date -u +"%H%M%S")
|
||||
run: echo "version_suffix=$(date -u +"%H%M%S")" >> "$GITHUB_OUTPUT"
|
||||
- name: Bump version
|
||||
id: bump_version
|
||||
run: |
|
||||
@@ -36,7 +40,7 @@ jobs:
|
||||
git add -u
|
||||
git commit -m "[version] update" -m "Created by: ${{ github.event.sender.login }}" -m ":ci skip all :ci run dl"
|
||||
git push origin --force ${{ github.event.ref }}:release
|
||||
echo ::set-output name=head_sha::$(git rev-parse HEAD)
|
||||
echo "head_sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
|
||||
- name: Update master
|
||||
env:
|
||||
PUSH_VERSION_COMMIT: ${{ secrets.PUSH_VERSION_COMMIT }}
|
||||
@@ -46,32 +50,46 @@ jobs:
|
||||
|
||||
build_unix:
|
||||
needs: prepare
|
||||
runs-on: ubuntu-18.04 # Standalone executable should be built on minimum supported OS
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.10'
|
||||
- uses: conda-incubator/setup-miniconda@v2
|
||||
with:
|
||||
miniforge-variant: Mambaforge
|
||||
use-mamba: true
|
||||
channels: conda-forge
|
||||
auto-update-conda: true
|
||||
activate-environment: ''
|
||||
auto-activate-base: false
|
||||
- name: Install Requirements
|
||||
run: |
|
||||
sudo apt-get -y install zip pandoc man
|
||||
python -m pip install --upgrade pip setuptools wheel twine
|
||||
python -m pip install Pyinstaller -r requirements.txt
|
||||
sudo apt-get -y install zip pandoc man sed
|
||||
python -m pip install -U pip setuptools wheel twine
|
||||
python -m pip install -U Pyinstaller -r requirements.txt
|
||||
reqs=$(mktemp)
|
||||
echo -e 'python=3.10.*\npyinstaller' >$reqs
|
||||
sed 's/^brotli.*/brotli-python/' <requirements.txt >>$reqs
|
||||
mamba create -n build --file $reqs
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
python devscripts/update-version.py ${{ needs.prepare.outputs.version_suffix }}
|
||||
python devscripts/make_lazy_extractors.py
|
||||
- name: Build Unix executables
|
||||
- name: Build Unix platform-independent binary
|
||||
run: |
|
||||
make all tar
|
||||
- name: Build Unix standalone binary
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
unset LD_LIBRARY_PATH # Harmful; set by setup-python
|
||||
conda activate build
|
||||
python pyinst.py --onedir
|
||||
(cd ./dist/yt-dlp_linux && zip -r ../yt-dlp_linux.zip .)
|
||||
python pyinst.py
|
||||
- name: Get SHA2-SUMS
|
||||
id: get_sha
|
||||
run: |
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
@@ -113,6 +131,49 @@ jobs:
|
||||
git -C taps/ push
|
||||
|
||||
|
||||
build_linux_arm:
|
||||
permissions:
|
||||
packages: write # for Creating cache
|
||||
runs-on: ubuntu-latest
|
||||
needs: prepare
|
||||
strategy:
|
||||
matrix:
|
||||
architecture:
|
||||
- armv7
|
||||
- aarch64
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
path: ./repo
|
||||
- name: Virtualized Install, Prepare & Build
|
||||
uses: yt-dlp/run-on-arch-action@v2
|
||||
with:
|
||||
githubToken: ${{ github.token }} # To cache image
|
||||
arch: ${{ matrix.architecture }}
|
||||
distro: ubuntu18.04 # Standalone executable should be built on minimum supported OS
|
||||
dockerRunArgs: --volume "${PWD}/repo:/repo"
|
||||
install: | # Installing Python 3.10 from the Deadsnakes repo raises errors
|
||||
apt update
|
||||
apt -y install zlib1g-dev python3.8 python3.8-dev python3.8-distutils python3-pip
|
||||
python3.8 -m pip install -U pip setuptools wheel
|
||||
# Cannot access requirements.txt from the repo directory at this stage
|
||||
python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi
|
||||
|
||||
run: |
|
||||
cd repo
|
||||
python3.8 -m pip install -U Pyinstaller -r requirements.txt # Cached version may be out of date
|
||||
python3.8 devscripts/update-version.py ${{ needs.prepare.outputs.version_suffix }}
|
||||
python3.8 devscripts/make_lazy_extractors.py
|
||||
python3.8 pyinst.py
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
path: | # run-on-arch-action designates armv7l as armv7
|
||||
repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}
|
||||
|
||||
|
||||
build_macos:
|
||||
runs-on: macos-11
|
||||
needs: prepare
|
||||
@@ -193,8 +254,8 @@ jobs:
|
||||
python-version: '3.8'
|
||||
- name: Install Requirements
|
||||
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
|
||||
python -m pip install --upgrade pip setuptools wheel py2exe
|
||||
pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.3-py3-none-any.whl" -r requirements.txt
|
||||
python -m pip install -U pip setuptools wheel py2exe
|
||||
pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.3-py3-none-any.whl" -r requirements.txt
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
@@ -229,8 +290,8 @@ jobs:
|
||||
architecture: 'x86'
|
||||
- name: Install Requirements
|
||||
run: |
|
||||
python -m pip install --upgrade pip setuptools wheel
|
||||
pip install "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.3-py3-none-any.whl" -r requirements.txt
|
||||
python -m pip install -U pip setuptools wheel
|
||||
pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.3-py3-none-any.whl" -r requirements.txt
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
@@ -248,8 +309,10 @@ jobs:
|
||||
|
||||
|
||||
publish_release:
|
||||
permissions:
|
||||
contents: write # for action-gh-release
|
||||
runs-on: ubuntu-latest
|
||||
needs: [prepare, build_unix, build_windows, build_windows32, build_macos, build_macos_legacy]
|
||||
needs: [prepare, build_unix, build_linux_arm, build_windows, build_windows32, build_macos, build_macos_legacy]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
@@ -276,6 +339,8 @@ jobs:
|
||||
sha256sum artifact/yt-dlp_macos | awk '{print $1 " yt-dlp_macos"}' >> SHA2-256SUMS
|
||||
sha256sum artifact/yt-dlp_macos.zip | awk '{print $1 " yt-dlp_macos.zip"}' >> SHA2-256SUMS
|
||||
sha256sum artifact/yt-dlp_macos_legacy | awk '{print $1 " yt-dlp_macos_legacy"}' >> SHA2-256SUMS
|
||||
sha256sum artifact/yt-dlp_linux_armv7l | awk '{print $1 " yt-dlp_linux_armv7l"}' >> SHA2-256SUMS
|
||||
sha256sum artifact/yt-dlp_linux_aarch64 | awk '{print $1 " yt-dlp_linux_aarch64"}' >> SHA2-256SUMS
|
||||
sha256sum artifact/dist/yt-dlp_linux | awk '{print $1 " yt-dlp_linux"}' >> SHA2-256SUMS
|
||||
sha256sum artifact/dist/yt-dlp_linux.zip | awk '{print $1 " yt-dlp_linux.zip"}' >> SHA2-256SUMS
|
||||
sha512sum artifact/yt-dlp | awk '{print $1 " yt-dlp"}' >> SHA2-512SUMS
|
||||
@@ -287,6 +352,8 @@ jobs:
|
||||
sha512sum artifact/yt-dlp_macos | awk '{print $1 " yt-dlp_macos"}' >> SHA2-512SUMS
|
||||
sha512sum artifact/yt-dlp_macos.zip | awk '{print $1 " yt-dlp_macos.zip"}' >> SHA2-512SUMS
|
||||
sha512sum artifact/yt-dlp_macos_legacy | awk '{print $1 " yt-dlp_macos_legacy"}' >> SHA2-512SUMS
|
||||
sha512sum artifact/yt-dlp_linux_armv7l | awk '{print $1 " yt-dlp_linux_armv7l"}' >> SHA2-512SUMS
|
||||
sha512sum artifact/yt-dlp_linux_aarch64 | awk '{print $1 " yt-dlp_linux_aarch64"}' >> SHA2-512SUMS
|
||||
sha512sum artifact/dist/yt-dlp_linux | awk '{print $1 " yt-dlp_linux"}' >> SHA2-512SUMS
|
||||
sha512sum artifact/dist/yt-dlp_linux.zip | awk '{print $1 " yt-dlp_linux.zip"}' >> SHA2-512SUMS
|
||||
|
||||
@@ -319,6 +386,8 @@ jobs:
|
||||
artifact/yt-dlp_macos
|
||||
artifact/yt-dlp_macos.zip
|
||||
artifact/yt-dlp_macos_legacy
|
||||
artifact/yt-dlp_linux_armv7l
|
||||
artifact/yt-dlp_linux_aarch64
|
||||
artifact/dist/yt-dlp_linux
|
||||
artifact/dist/yt-dlp_linux.zip
|
||||
_update_spec
|
||||
|
||||
3
.github/workflows/core.yml
vendored
3
.github/workflows/core.yml
vendored
@@ -1,5 +1,8 @@
|
||||
name: Core Tests
|
||||
on: [push, pull_request]
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
tests:
|
||||
name: Core Tests
|
||||
|
||||
3
.github/workflows/download.yml
vendored
3
.github/workflows/download.yml
vendored
@@ -1,5 +1,8 @@
|
||||
name: Download Tests
|
||||
on: [push, pull_request]
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
quick:
|
||||
name: Quick Download Tests
|
||||
|
||||
3
.github/workflows/quick-test.yml
vendored
3
.github/workflows/quick-test.yml
vendored
@@ -1,5 +1,8 @@
|
||||
name: Quick Test
|
||||
on: [push, pull_request]
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
tests:
|
||||
name: Core Test
|
||||
|
||||
26
CONTRIBUTORS
26
CONTRIBUTORS
@@ -331,3 +331,29 @@ tannertechnology
|
||||
Timendum
|
||||
tobi1805
|
||||
TokyoBlackHole
|
||||
ajayyy
|
||||
Alienmaster
|
||||
bsun0000
|
||||
changren-wcr
|
||||
ClosedPort22
|
||||
CrankDatSouljaBoy
|
||||
cruel-efficiency
|
||||
endotronic
|
||||
Generator
|
||||
gibson042
|
||||
How-Bout-No
|
||||
invertico
|
||||
jahway603
|
||||
jwoglom
|
||||
lksj
|
||||
megapro17
|
||||
mlampe
|
||||
MrOctopus
|
||||
nosoop
|
||||
puc9
|
||||
sashashura
|
||||
schnusch
|
||||
SG5
|
||||
the-marenga
|
||||
tkgmomosheep
|
||||
vitkhab
|
||||
|
||||
121
Changelog.md
121
Changelog.md
@@ -11,6 +11,127 @@
|
||||
-->
|
||||
|
||||
|
||||
### 2022.11.11
|
||||
|
||||
* Merge youtube-dl: Upto [commit/de39d12](https://github.com/ytdl-org/youtube-dl/commit/de39d128)
|
||||
* Backport SSL configuration from Python 3.10 by [coletdjnz](https://github.com/coletdjnz)
|
||||
* Do more processing in `--flat-playlist`
|
||||
* Fix `--list` options not implying `-s` in some cases by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly)
|
||||
* Fix end time of clips by [cruel-efficiency](https://github.com/cruel-efficiency)
|
||||
* Fix for `formats=None`
|
||||
* Write API params in debug head
|
||||
* [outtmpl] Ensure ASCII in json and add option for Unicode
|
||||
* [SponsorBlock] Add `type` field, obey `--retry-sleep extractor`, relax duration check for large segments
|
||||
* [SponsorBlock] **Support `chapter` category** by [ajayyy](https://github.com/ajayyy), [pukkandan](https://github.com/pukkandan)
|
||||
* [ThumbnailsConvertor] Fix filename escaping by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan)
|
||||
* [ModifyChapters] Handle the entire video being marked for removal
|
||||
* [embedthumbnail] Fix thumbnail name in mp3 by [How-Bout-No](https://github.com/How-Bout-No)
|
||||
* [downloader/fragment] HLS download can continue without first fragment
|
||||
* [cookies] Improve `LenientSimpleCookie` by [Grub4K](https://github.com/Grub4K)
|
||||
* [jsinterp] Improve separating regex
|
||||
* [extractor/common] Fix `fatal=False` for `_search_nuxt_data`
|
||||
* [extractor/common] Improve `_generic_title`
|
||||
* [extractor/common] Fix `json_ld` type checks by [Grub4K](https://github.com/Grub4K)
|
||||
* [extractor/generic] Separate embed extraction into own function
|
||||
* [extractor/generic:quoted-html] Add extractor by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor/unsupported] Raise error on known DRM-only sites by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [utils] `js_to_json`: Improve escape handling by [Grub4K](https://github.com/Grub4K)
|
||||
* [utils] `strftime_or_none`: Workaround Python bug on Windows
|
||||
* [utils] `traverse_obj`: Always return list when branching, allow `re.Match` objects by [Grub4K](https://github.com/Grub4K)
|
||||
* [build, test] Harden workflows' security by [sashashura](https://github.com/sashashura)
|
||||
* [build] `py2exe`: Migrate to freeze API by [SG5](https://github.com/SG5), [pukkandan](https://github.com/pukkandan)
|
||||
* [build] Create `armv7l` and `aarch64` releases by [MrOctopus](https://github.com/MrOctopus), [pukkandan](https://github.com/pukkandan)
|
||||
* [build] Make linux binary truly standalone using `conda` by [mlampe](https://github.com/mlampe)
|
||||
* [build] Replace `set-output` with `GITHUB_OUTPUT` by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [update] Use error code `100` for update errors
|
||||
* [compat] Fix `shutils.move` in restricted ACL mode on BSD by [ClosedPort22](https://github.com/ClosedPort22), [pukkandan](https://github.com/pukkandan)
|
||||
* [docs, devscripts] Document `pyinst`'s argument passthrough by [jahway603](https://github.com/jahway603)
|
||||
* [test] Allow `extract_flat` in download tests by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
|
||||
* [cleanup] Misc fixes and cleanup by [pukkandan](https://github.com/pukkandan), [Alienmaster](https://github.com/Alienmaster)
|
||||
* [extractor/aeon] Add extractor by [DoubleCouponDay](https://github.com/DoubleCouponDay)
|
||||
* [extractor/agora] Add extractors by [selfisekai](https://github.com/selfisekai)
|
||||
* [extractor/camsoda] Add extractor by [zulaport](https://github.com/zulaport)
|
||||
* [extractor/cinetecamilano] Add extractor by [timendum](https://github.com/timendum)
|
||||
* [extractor/deuxm] Add extractors by [CrankDatSouljaBoy](https://github.com/CrankDatSouljaBoy)
|
||||
* [extractor/genius] Add extractors by [bashonly](https://github.com/bashonly)
|
||||
* [extractor/japandiet] Add extractors by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [extractor/listennotes] Add extractor by [lksj](https://github.com/lksj), [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor/nos.nl] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
|
||||
* [extractor/oftv] Add extractors by [DoubleCouponDay](https://github.com/DoubleCouponDay)
|
||||
* [extractor/podbayfm] Add extractor by [schnusch](https://github.com/schnusch)
|
||||
* [extractor/qingting] Add extractor by [bashonly](https://github.com/bashonly), [changren-wcr](https://github.com/changren-wcr)
|
||||
* [extractor/screen9] Add extractor by [tpikonen](https://github.com/tpikonen)
|
||||
* [extractor/swearnet] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
|
||||
* [extractor/YleAreena] Add extractor by [pukkandan](https://github.com/pukkandan), [vitkhab](https://github.com/vitkhab)
|
||||
* [extractor/zeenews] Add extractor by [m4tu4g](https://github.com/m4tu4g), [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor/youtube:tab] **Update tab handling for redesign** by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
|
||||
* Channel URLs download all uploads of the channel as multiple playlists, separated by tab
|
||||
* [extractor/youtube] Differentiate between no comments and disabled comments by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/youtube] Extract `concurrent_view_count` for livestreams by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/youtube] Fix `duration` for premieres by [nosoop](https://github.com/nosoop)
|
||||
* [extractor/youtube] Fix `live_status` by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor/youtube] Ignore incomplete data error for comment replies by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/youtube] Improve chapter parsing from description
|
||||
* [extractor/youtube] Mark videos as fully watched by [bsun0000](https://github.com/bsun0000)
|
||||
* [extractor/youtube] Update piped instances by [Generator](https://github.com/Generator)
|
||||
* [extractor/youtube] Update playlist metadata extraction for new layout by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/youtube:tab] Fix video metadata from tabs by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/youtube:tab] Let `approximate_date` return timestamp
|
||||
* [extractor/americastestkitchen] Fix extractor by [bashonly](https://github.com/bashonly)
|
||||
* [extractor/bbc] Support onion domains by [DoubleCouponDay](https://github.com/DoubleCouponDay)
|
||||
* [extractor/bilibili] Add chapters and misc cleanup by [lockmatrix](https://github.com/lockmatrix), [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor/bilibili] Fix BilibiliIE and Bangumi extractors by [lockmatrix](https://github.com/lockmatrix), [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor/bitchute] Better error for geo-restricted videos by [flashdagger](https://github.com/flashdagger)
|
||||
* [extractor/bitchute] Improve `BitChuteChannelIE` by [flashdagger](https://github.com/flashdagger), [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor/bitchute] Simplify extractor by [flashdagger](https://github.com/flashdagger), [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor/cda] Support login through API by [selfisekai](https://github.com/selfisekai)
|
||||
* [extractor/crunchyroll] Beta is now the only layout by [tejing1](https://github.com/tejing1)
|
||||
* [extractor/detik] Avoid unnecessary extraction
|
||||
* [extractor/doodstream] Remove extractor
|
||||
* [extractor/dplay] Add MotorTrendOnDemand extractor by [bashonly](https://github.com/bashonly)
|
||||
* [extractor/epoch] Support videos without data-trailer by [gibson042](https://github.com/gibson042), [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor/fox] Extract thumbnail by [vitkhab](https://github.com/vitkhab)
|
||||
* [extractor/foxnews] Add `FoxNewsVideo` extractor
|
||||
* [extractor/hotstar] Add season support by [m4tu4g](https://github.com/m4tu4g)
|
||||
* [extractor/hotstar] Refactor v1 API calls
|
||||
* [extractor/iprima] Make json+ld non-fatal by [bashonly](https://github.com/bashonly)
|
||||
* [extractor/iq] Increase phantomjs timeout
|
||||
* [extractor/kaltura] Support playlists by [jwoglom](https://github.com/jwoglom), [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor/lbry] Authenticate with cookies by [flashdagger](https://github.com/flashdagger)
|
||||
* [extractor/livestreamfails] Support posts by [invertico](https://github.com/invertico)
|
||||
* [extractor/mlb] Add `MLBArticle` extractor by [HobbyistDev](https://github.com/HobbyistDev)
|
||||
* [extractor/mxplayer] Improve extractor by [m4tu4g](https://github.com/m4tu4g)
|
||||
* [extractor/niconico] Always use HTTPS for requests
|
||||
* [extractor/nzherald] Support new video embed by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/odnoklassniki] Support boosty.to embeds by [Lesmiscore](https://github.com/Lesmiscore), [megapro17](https://github.com/megapro17), [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor/paramountplus] Update API token by [bashonly](https://github.com/bashonly)
|
||||
* [extractor/reddit] Add fallback format by [bashonly](https://github.com/bashonly)
|
||||
* [extractor/redgifs] Fix extractors by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor/redgifs] Refresh auth token for 401 by [endotronic](https://github.com/endotronic), [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor/rumble] Add HLS formats and extract more metadata by [flashdagger](https://github.com/flashdagger)
|
||||
* [extractor/sbs] Improve `_VALID_URL` by [bashonly](https://github.com/bashonly)
|
||||
* [extractor/skyit] Fix extractors by [nixxo](https://github.com/nixxo)
|
||||
* [extractor/stripchat] Fix hostname for HLS stream by [zulaport](https://github.com/zulaport)
|
||||
* [extractor/stripchat] Improve error message by [freezboltz](https://github.com/freezboltz)
|
||||
* [extractor/telegram] Add playlist support and more metadata by [bashonly](https://github.com/bashonly), [bsun0000](https://github.com/bsun0000)
|
||||
* [extractor/Tnaflix] Fix for HTTP 500 by [SG5](https://github.com/SG5), [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor/tubitv] Better DRM detection by [bashonly](https://github.com/bashonly)
|
||||
* [extractor/tvp] Update extractors by [selfisekai](https://github.com/selfisekai)
|
||||
* [extractor/twitcasting] Fix `data-movie-playlist` extraction by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [extractor/twitter] Add onion site to `_VALID_URL` by [DoubleCouponDay](https://github.com/DoubleCouponDay)
|
||||
* [extractor/twitter] Add Spaces extractor and GraphQL API by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly), [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor/twitter] Support multi-video posts by [Grub4K](https://github.com/Grub4K)
|
||||
* [extractor/uktvplay] Fix `_VALID_URL`
|
||||
* [extractor/viu] Support subtitles of on-screen text by [tkgmomosheep](https://github.com/tkgmomosheep)
|
||||
* [extractor/VK] Fix playlist URLs by [the-marenga](https://github.com/the-marenga)
|
||||
* [extractor/vlive] Extract `release_timestamp`
|
||||
* [extractor/voot] Improve `_VALID_URL` by [freezboltz](https://github.com/freezboltz)
|
||||
* [extractor/wordpress:mb.miniAudioPlayer] Add embed extractor by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/YoutubeWebArchive] Improve metadata extraction by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/zee5] Improve `_VALID_URL` by [m4tu4g](https://github.com/m4tu4g)
|
||||
* [extractor/zenyandex] Fix extractors by [lksj](https://github.com/lksj), [puc9](https://github.com/puc9), [pukkandan](https://github.com/pukkandan)
|
||||
|
||||
|
||||
### 2022.10.04
|
||||
|
||||
* Allow a `set` to be passed as `download_archive` by [pukkandan](https://github.com/pukkandan), [bashonly](https://github.com/bashonly)
|
||||
|
||||
46
README.md
46
README.md
@@ -12,7 +12,7 @@
|
||||
[](LICENSE "License")
|
||||
[](https://github.com/yt-dlp/yt-dlp/actions "CI Status")
|
||||
[](https://github.com/yt-dlp/yt-dlp/commits "Commit History")
|
||||
[](https://github.com/yt-dlp/yt-dlp/commits "Commit History")
|
||||
[](https://github.com/yt-dlp/yt-dlp/commits "Commit History")
|
||||
|
||||
</div>
|
||||
<!-- MANPAGE: END EXCLUDED SECTION -->
|
||||
@@ -74,7 +74,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t
|
||||
|
||||
# NEW FEATURES
|
||||
|
||||
* Merged with **youtube-dl v2021.12.17+ [commit/ed5c44e](https://github.com/ytdl-org/youtube-dl/commit/ed5c44e7b74ac77f87ca5ed6cb5e964a0c6a0678)**<!--([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))--> and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
|
||||
* Merged with **youtube-dl v2021.12.17+ [commit/de39d12](https://github.com/ytdl-org/youtube-dl/commit/de39d128)** <!--([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))--> and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
|
||||
|
||||
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
|
||||
|
||||
@@ -88,7 +88,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t
|
||||
* Supports some (but not all) age-gated content without cookies
|
||||
* Download livestreams from the start using `--live-from-start` (*experimental*)
|
||||
* `255kbps` audio is extracted (if available) from YouTube Music when premium cookies are given
|
||||
* Redirect channel's home URL automatically to `/video` to preserve the old behaviour
|
||||
* Channel URLs download all uploads of the channel, including shorts and live
|
||||
|
||||
* **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER]`
|
||||
|
||||
@@ -142,7 +142,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu
|
||||
* `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior
|
||||
* The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this
|
||||
* Live chats (if available) are considered as subtitles. Use `--sub-langs all,-live_chat` to download all subtitles except live chat. You can also use `--compat-options no-live-chat` to prevent any live chat/danmaku from downloading
|
||||
* YouTube channel URLs are automatically redirected to `/video`. Append a `/featured` to the URL to download only the videos in the home page. If the channel does not have a videos tab, we try to download the equivalent `UU` playlist instead. For all other tabs, if the channel does not show the requested tab, an error will be raised. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections
|
||||
* YouTube channel URLs download all uploads of the channel. To download only the videos in a specific tab, pass the tab's URL. If the channel does not show the requested tab, an error will be raised. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections
|
||||
* Unavailable videos are also listed for YouTube playlists. Use `--compat-options no-youtube-unavailable-videos` to remove this
|
||||
* The upload dates extracted from YouTube are in UTC [when available](https://github.com/yt-dlp/yt-dlp/blob/89e4d86171c7b7c997c77d4714542e0383bf0db0/yt_dlp/extractor/youtube.py#L3898-L3900). Use `--compat-options no-youtube-prefer-utc-upload-date` to prefer the non-UTC upload date.
|
||||
* If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this
|
||||
@@ -201,6 +201,8 @@ File|Description
|
||||
[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`<br/> ([Not recommended](#standalone-py2exe-builds-windows))
|
||||
[yt-dlp_linux](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux)|Linux standalone x64 binary
|
||||
[yt-dlp_linux.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux.zip)|Unpackaged Linux executable (no auto-update)
|
||||
[yt-dlp_linux_armv7l](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_armv7l)|Linux standalone armv7l (32-bit) binary
|
||||
[yt-dlp_linux_aarch64](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64)|Linux standalone aarch64 (64-bit) binary
|
||||
[yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged Windows executable (no auto-update)
|
||||
[yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS (10.15+) executable (no auto-update)
|
||||
[yt-dlp_macos_legacy](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos_legacy)|MacOS (10.9+) standalone x64 executable
|
||||
@@ -277,6 +279,8 @@ To build the standalone executable, you must have Python and `pyinstaller` (plus
|
||||
|
||||
On some systems, you may need to use `py` or `python` instead of `python3`.
|
||||
|
||||
`pyinst.py` accepts any arguments that can be passed to `pyinstaller`, such as `--onefile/-F` or `--onedir/-D`, which is further [documented here](https://pyinstaller.org/en/stable/usage.html#what-to-generate).
|
||||
|
||||
Note that pyinstaller with versions below 4.4 [do not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment.
|
||||
|
||||
**Important**: Running `pyinstaller` directly **without** using `pyinst.py` is **not** officially supported. This may or may not work correctly.
|
||||
@@ -1042,7 +1046,7 @@ Make chapter entries for, or remove various segments (sponsor,
|
||||
for, separated by commas. Available
|
||||
categories are sponsor, intro, outro,
|
||||
selfpromo, preview, filler, interaction,
|
||||
music_offtopic, poi_highlight, all and
|
||||
music_offtopic, poi_highlight, chapter, all and
|
||||
default (=all). You can prefix the category
|
||||
with a "-" to exclude it. See [1] for
|
||||
description of the categories. E.g.
|
||||
@@ -1054,8 +1058,8 @@ Make chapter entries for, or remove various segments (sponsor,
|
||||
remove takes precedence. The syntax and
|
||||
available categories are the same as for
|
||||
--sponsorblock-mark except that "default"
|
||||
refers to "all,-filler" and poi_highlight is
|
||||
not available
|
||||
refers to "all,-filler" and poi_highlight and
|
||||
chapter are not available
|
||||
--sponsorblock-chapter-title TEMPLATE
|
||||
An output template for the title of the
|
||||
SponsorBlock chapters created by
|
||||
@@ -1189,9 +1193,9 @@ The field names themselves (the part inside the parenthesis) can also have some
|
||||
|
||||
1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-placeholder`. E.g. `%(uploader|Unknown)s`
|
||||
|
||||
1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, yt-dlp additionally supports converting to `B` = **B**ytes, `j` = **j**son (flag `#` for pretty-printing), `h` = HTML escaping, `l` = a comma separated **l**ist (flag `#` for `\n` newline-separated), `q` = a string **q**uoted for the terminal (flag `#` to split a list into different arguments), `D` = add **D**ecimal suffixes (e.g. 10M) (flag `#` to use 1024 as factor), and `S` = **S**anitize as filename (flag `#` for restricted)
|
||||
1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, yt-dlp additionally supports converting to `B` = **B**ytes, `j` = **j**son (flag `#` for pretty-printing, `+` for Unicode), `h` = HTML escaping, `l` = a comma separated **l**ist (flag `#` for `\n` newline-separated), `q` = a string **q**uoted for the terminal (flag `#` to split a list into different arguments), `D` = add **D**ecimal suffixes (e.g. 10M) (flag `#` to use 1024 as factor), and `S` = **S**anitize as filename (flag `#` for restricted)
|
||||
|
||||
1. **Unicode normalization**: The format type `U` can be used for NFC [unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. E.g. `%(title)+.100U` is NFKC
|
||||
1. **Unicode normalization**: The format type `U` can be used for NFC [Unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. E.g. `%(title)+.100U` is NFKC
|
||||
|
||||
To summarize, the general syntax for a field is:
|
||||
```
|
||||
@@ -1200,6 +1204,10 @@ To summarize, the general syntax for a field is:
|
||||
|
||||
Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `link`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`, `pl_video`. E.g. `-o "%(title)s.%(ext)s" -o "thumbnail:%(title)s\%(title)s.%(ext)s"` will put the thumbnails in a folder with the same name as the video. If any of the templates is empty, that type of file will not be written. E.g. `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video.
|
||||
|
||||
<a id="outtmpl-postprocess-note"></a>
|
||||
|
||||
Note: Due to post-processing (i.e. merging etc.), the actual output filename might differ. Use `--print after_move:filepath` to get the name after all post-processing is complete.
|
||||
|
||||
The available fields are:
|
||||
|
||||
- `id` (string): Video identifier
|
||||
@@ -1226,6 +1234,7 @@ The available fields are:
|
||||
- `duration` (numeric): Length of the video in seconds
|
||||
- `duration_string` (string): Length of the video (HH:mm:ss)
|
||||
- `view_count` (numeric): How many users have watched the video on the platform
|
||||
- `concurrent_view_count` (numeric): How many users are currently watching the video on the platform.
|
||||
- `like_count` (numeric): Number of positive ratings of the video
|
||||
- `dislike_count` (numeric): Number of negative ratings of the video
|
||||
- `repost_count` (numeric): Number of reposts of the video
|
||||
@@ -1299,7 +1308,7 @@ Available only when using `--download-sections` and for `chapter:` prefix when u
|
||||
Available only when used in `--print`:
|
||||
|
||||
- `urls` (string): The URLs of all requested formats, one in each line
|
||||
- `filename` (string): Name of the video file. Note that the actual filename may be different due to post-processing. Use `--exec echo` to get the name after all postprocessing is complete
|
||||
- `filename` (string): Name of the video file. Note that the [actual filename may differ](#outtmpl-postprocess-note)
|
||||
- `formats_table` (table): The video format table as printed by `--list-formats`
|
||||
- `thumbnails_table` (table): The thumbnail format table as printed by `--list-thumbnails`
|
||||
- `subtitles_table` (table): The subtitle format table as printed by `--list-subs`
|
||||
@@ -1310,10 +1319,11 @@ Available only in `--sponsorblock-chapter-title`:
|
||||
|
||||
- `start_time` (numeric): Start time of the chapter in seconds
|
||||
- `end_time` (numeric): End time of the chapter in seconds
|
||||
- `categories` (list): The SponsorBlock categories the chapter belongs to
|
||||
- `categories` (list): The [SponsorBlock categories](https://wiki.sponsor.ajay.app/w/Types#Category) the chapter belongs to
|
||||
- `category` (string): The smallest SponsorBlock category the chapter belongs to
|
||||
- `category_names` (list): Friendly names of the categories
|
||||
- `name` (string): Friendly name of the smallest category
|
||||
- `type` (string): The [SponsorBlock action type](https://wiki.sponsor.ajay.app/w/Types#Action_Type) of the chapter
|
||||
|
||||
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. E.g. for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKc`, this will result in a `yt-dlp test video-BaW_jenozKc.mp4` file created in the current directory.
|
||||
|
||||
@@ -1638,9 +1648,9 @@ The metadata obtained by the extractors can be modified by using `--parse-metada
|
||||
|
||||
`--replace-in-metadata FIELDS REGEX REPLACE` is used to replace text in any metadata field using [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax). [Backreferences](https://docs.python.org/3/library/re.html?highlight=backreferences#re.sub) can be used in the replace string for advanced use.
|
||||
|
||||
The general syntax of `--parse-metadata FROM:TO` is to give the name of a field or an [output template](#output-template) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields.
|
||||
The general syntax of `--parse-metadata FROM:TO` is to give the name of a field or an [output template](#output-template) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups, a single field name, or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields.
|
||||
|
||||
Note that any field created by this can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--embed-metadata`.
|
||||
Note that these options preserve their relative order, allowing replacements to be made in parsed fields and viceversa. Also, any field thus created can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--embed-metadata`.
|
||||
|
||||
This option also has a few special uses:
|
||||
|
||||
@@ -1723,17 +1733,13 @@ The following extractors use this feature:
|
||||
|
||||
#### youtubetab (YouTube playlists, channels, feeds, etc.)
|
||||
* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)
|
||||
* `approximate_date`: Extract approximate `upload_date` in flat-playlist. This may cause date-based filters to be slightly off
|
||||
* `approximate_date`: Extract approximate `upload_date` and `timestamp` in flat-playlist. This may cause date-based filters to be slightly off
|
||||
|
||||
#### funimation
|
||||
* `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`
|
||||
* `version`: The video version to extract - `uncut` or `simulcast`
|
||||
|
||||
#### crunchyroll
|
||||
* `language`: Audio languages to extract, e.g. `crunchyroll:language=jaJp`
|
||||
* `hardsub`: Which hard-sub versions to extract, e.g. `crunchyroll:hardsub=None,enUS`
|
||||
|
||||
#### crunchyrollbeta
|
||||
#### crunchyrollbeta (Crunchyroll)
|
||||
* `format`: Which stream type(s) to extract (default: `adaptive_hls`). Potentially useful values include `adaptive_hls`, `adaptive_dash`, `vo_adaptive_hls`, `vo_adaptive_dash`, `download_hls`, `download_dash`, `multitrack_adaptive_hls_v2`
|
||||
* `hardsub`: Preference order for which hardsub versions to extract, or `all` (default: `None` = no hardsubs), e.g. `crunchyrollbeta:hardsub=en-US,None`
|
||||
|
||||
@@ -1761,6 +1767,8 @@ The following extractors use this feature:
|
||||
#### rokfinchannel
|
||||
* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
|
||||
|
||||
#### twitter
|
||||
* `force_graphql`: Force usage of the GraphQL API. By default it will only be used if login cookies are provided
|
||||
|
||||
NOTE: These options may be changed/removed in the future without concern for backward compatibility
|
||||
|
||||
|
||||
@@ -50,5 +50,7 @@ UPDATE_HINT = None
|
||||
'''
|
||||
|
||||
write_file('yt_dlp/version.py', VERSION_FILE)
|
||||
print(f'::set-output name=ytdlp_version::{VERSION}')
|
||||
github_output = os.getenv('GITHUB_OUTPUT')
|
||||
if github_output:
|
||||
write_file(github_output, f'ytdlp_version={VERSION}\n', 'a')
|
||||
print(f'\nVersion = {VERSION}, Git HEAD = {GIT_HEAD}')
|
||||
|
||||
@@ -7,8 +7,8 @@ def read_file(fname):
|
||||
return f.read()
|
||||
|
||||
|
||||
def write_file(fname, content):
|
||||
with open(fname, 'w', encoding='utf-8') as f:
|
||||
def write_file(fname, content, mode='w'):
|
||||
with open(fname, mode, encoding='utf-8') as f:
|
||||
return f.write(content)
|
||||
|
||||
|
||||
|
||||
@@ -12,9 +12,8 @@ from PyInstaller.__main__ import run as run_pyinstaller
|
||||
|
||||
from devscripts.utils import read_version
|
||||
|
||||
OS_NAME, MACHINE, ARCH = sys.platform, platform.machine(), platform.architecture()[0][:2]
|
||||
if MACHINE in ('x86_64', 'AMD64') or ('i' in MACHINE and '86' in MACHINE):
|
||||
# NB: Windows x86 has MACHINE = AMD64 irrespective of bitness
|
||||
OS_NAME, MACHINE, ARCH = sys.platform, platform.machine().lower(), platform.architecture()[0][:2]
|
||||
if MACHINE in ('x86', 'x86_64', 'amd64', 'i386', 'i686'):
|
||||
MACHINE = 'x86' if ARCH == '32' else ''
|
||||
|
||||
|
||||
@@ -63,7 +62,7 @@ def exe(onedir):
|
||||
name = '_'.join(filter(None, (
|
||||
'yt-dlp',
|
||||
{'win32': '', 'darwin': 'macos'}.get(OS_NAME, OS_NAME),
|
||||
MACHINE
|
||||
MACHINE,
|
||||
)))
|
||||
return name, ''.join(filter(None, (
|
||||
'dist/',
|
||||
|
||||
125
setup.py
125
setup.py
@@ -36,36 +36,34 @@ def packages():
|
||||
|
||||
|
||||
def py2exe_params():
|
||||
import py2exe # noqa: F401
|
||||
|
||||
warnings.warn(
|
||||
'py2exe builds do not support pycryptodomex and needs VC++14 to run. '
|
||||
'The recommended way is to use "pyinst.py" to build using pyinstaller')
|
||||
'It is recommended to run "pyinst.py" to build using pyinstaller instead')
|
||||
|
||||
return {
|
||||
'console': [{
|
||||
'script': './yt_dlp/__main__.py',
|
||||
'dest_base': 'yt-dlp',
|
||||
'icon_resources': [(1, 'devscripts/logo.ico')],
|
||||
}],
|
||||
'version_info': {
|
||||
'version': VERSION,
|
||||
'description': DESCRIPTION,
|
||||
'comments': LONG_DESCRIPTION.split('\n')[0],
|
||||
'product_name': 'yt-dlp',
|
||||
'product_version': VERSION,
|
||||
'icon_resources': [(1, 'devscripts/logo.ico')],
|
||||
}],
|
||||
'options': {
|
||||
'py2exe': {
|
||||
'bundle_files': 0,
|
||||
'compressed': 1,
|
||||
'optimize': 2,
|
||||
'dist_dir': './dist',
|
||||
'excludes': ['Crypto', 'Cryptodome'], # py2exe cannot import Crypto
|
||||
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
|
||||
# Modules that are only imported dynamically must be added here
|
||||
'includes': ['yt_dlp.compat._legacy'],
|
||||
}
|
||||
},
|
||||
'zipfile': None
|
||||
'options': {
|
||||
'bundle_files': 0,
|
||||
'compressed': 1,
|
||||
'optimize': 2,
|
||||
'dist_dir': './dist',
|
||||
'excludes': ['Crypto', 'Cryptodome'], # py2exe cannot import Crypto
|
||||
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
|
||||
# Modules that are only imported dynamically must be added here
|
||||
'includes': ['yt_dlp.compat._legacy'],
|
||||
},
|
||||
'zipfile': None,
|
||||
}
|
||||
|
||||
|
||||
@@ -113,41 +111,58 @@ class build_lazy_extractors(Command):
|
||||
subprocess.run([sys.executable, 'devscripts/make_lazy_extractors.py'])
|
||||
|
||||
|
||||
params = py2exe_params() if sys.argv[1:2] == ['py2exe'] else build_params()
|
||||
setup(
|
||||
name='yt-dlp',
|
||||
version=VERSION,
|
||||
maintainer='pukkandan',
|
||||
maintainer_email='pukkandan.ytdlp@gmail.com',
|
||||
description=DESCRIPTION,
|
||||
long_description=LONG_DESCRIPTION,
|
||||
long_description_content_type='text/markdown',
|
||||
url='https://github.com/yt-dlp/yt-dlp',
|
||||
packages=packages(),
|
||||
install_requires=REQUIREMENTS,
|
||||
python_requires='>=3.7',
|
||||
project_urls={
|
||||
'Documentation': 'https://github.com/yt-dlp/yt-dlp#readme',
|
||||
'Source': 'https://github.com/yt-dlp/yt-dlp',
|
||||
'Tracker': 'https://github.com/yt-dlp/yt-dlp/issues',
|
||||
'Funding': 'https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators',
|
||||
},
|
||||
classifiers=[
|
||||
'Topic :: Multimedia :: Video',
|
||||
'Development Status :: 5 - Production/Stable',
|
||||
'Environment :: Console',
|
||||
'Programming Language :: Python',
|
||||
'Programming Language :: Python :: 3.7',
|
||||
'Programming Language :: Python :: 3.8',
|
||||
'Programming Language :: Python :: 3.9',
|
||||
'Programming Language :: Python :: 3.10',
|
||||
'Programming Language :: Python :: 3.11',
|
||||
'Programming Language :: Python :: Implementation',
|
||||
'Programming Language :: Python :: Implementation :: CPython',
|
||||
'Programming Language :: Python :: Implementation :: PyPy',
|
||||
'License :: Public Domain',
|
||||
'Operating System :: OS Independent',
|
||||
],
|
||||
cmdclass={'build_lazy_extractors': build_lazy_extractors},
|
||||
**params
|
||||
)
|
||||
def main():
|
||||
if sys.argv[1:2] == ['py2exe']:
|
||||
params = py2exe_params()
|
||||
try:
|
||||
from py2exe import freeze
|
||||
except ImportError:
|
||||
import py2exe # noqa: F401
|
||||
warnings.warn('You are using an outdated version of py2exe. Support for this version will be removed in the future')
|
||||
params['console'][0].update(params.pop('version_info'))
|
||||
params['options'] = {'py2exe': params.pop('options')}
|
||||
else:
|
||||
return freeze(**params)
|
||||
else:
|
||||
params = build_params()
|
||||
|
||||
setup(
|
||||
name='yt-dlp',
|
||||
version=VERSION,
|
||||
maintainer='pukkandan',
|
||||
maintainer_email='pukkandan.ytdlp@gmail.com',
|
||||
description=DESCRIPTION,
|
||||
long_description=LONG_DESCRIPTION,
|
||||
long_description_content_type='text/markdown',
|
||||
url='https://github.com/yt-dlp/yt-dlp',
|
||||
packages=packages(),
|
||||
install_requires=REQUIREMENTS,
|
||||
python_requires='>=3.7',
|
||||
project_urls={
|
||||
'Documentation': 'https://github.com/yt-dlp/yt-dlp#readme',
|
||||
'Source': 'https://github.com/yt-dlp/yt-dlp',
|
||||
'Tracker': 'https://github.com/yt-dlp/yt-dlp/issues',
|
||||
'Funding': 'https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators',
|
||||
},
|
||||
classifiers=[
|
||||
'Topic :: Multimedia :: Video',
|
||||
'Development Status :: 5 - Production/Stable',
|
||||
'Environment :: Console',
|
||||
'Programming Language :: Python',
|
||||
'Programming Language :: Python :: 3.7',
|
||||
'Programming Language :: Python :: 3.8',
|
||||
'Programming Language :: Python :: 3.9',
|
||||
'Programming Language :: Python :: 3.10',
|
||||
'Programming Language :: Python :: 3.11',
|
||||
'Programming Language :: Python :: Implementation',
|
||||
'Programming Language :: Python :: Implementation :: CPython',
|
||||
'Programming Language :: Python :: Implementation :: PyPy',
|
||||
'License :: Public Domain',
|
||||
'Operating System :: OS Independent',
|
||||
],
|
||||
cmdclass={'build_lazy_extractors': build_lazy_extractors},
|
||||
**params
|
||||
)
|
||||
|
||||
|
||||
main()
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
- **9now.com.au**
|
||||
- **abc.net.au**
|
||||
- **abc.net.au:iview**
|
||||
- **abc.net.au:iview:showseries**
|
||||
- **abc.net.au:iview:showseries**
|
||||
- **abcnews**
|
||||
- **abcnews:video**
|
||||
- **abcotvs**: ABC Owned Television Stations
|
||||
@@ -35,7 +35,7 @@
|
||||
- **acast:channel**
|
||||
- **AcFunBangumi**
|
||||
- **AcFunVideo**
|
||||
- **ADN**: [<abbr title="netrc machine"><em>animedigitalnetwork</em></abbr>] Anime Digital Network
|
||||
- **ADN**: [<abbr title="netrc machine"><em>animationdigitalnetwork</em></abbr>] Animation Digital Network
|
||||
- **AdobeConnect**
|
||||
- **adobetv**
|
||||
- **adobetv:channel**
|
||||
@@ -46,6 +46,7 @@
|
||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault
|
||||
- **aenetworks:collection**
|
||||
- **aenetworks:show**
|
||||
- **AeonCo**
|
||||
- **afreecatv**: [<abbr title="netrc machine"><em>afreecatv</em></abbr>] afreecatv.com
|
||||
- **afreecatv:live**: [<abbr title="netrc machine"><em>afreecatv</em></abbr>] afreecatv.com
|
||||
- **afreecatv:user**
|
||||
@@ -119,13 +120,12 @@
|
||||
- **Bandcamp:album**
|
||||
- **Bandcamp:user**
|
||||
- **Bandcamp:weekly**
|
||||
- **bangumi.bilibili.com**: BiliBili番剧
|
||||
- **BannedVideo**
|
||||
- **bbc**: [<abbr title="netrc machine"><em>bbc</em></abbr>] BBC
|
||||
- **bbc.co.uk**: [<abbr title="netrc machine"><em>bbc</em></abbr>] BBC iPlayer
|
||||
- **bbc.co.uk:article**: BBC articles
|
||||
- **bbc.co.uk:iplayer:episodes**
|
||||
- **bbc.co.uk:iplayer:group**
|
||||
- **bbc.co.uk:iplayer:episodes**
|
||||
- **bbc.co.uk:iplayer:group**
|
||||
- **bbc.co.uk:playlist**
|
||||
- **BBVTV**: [<abbr title="netrc machine"><em>bbvtv</em></abbr>]
|
||||
- **BBVTVLive**: [<abbr title="netrc machine"><em>bbvtv</em></abbr>]
|
||||
@@ -149,6 +149,8 @@
|
||||
- **Bilibili category extractor**
|
||||
- **BilibiliAudio**
|
||||
- **BilibiliAudioAlbum**
|
||||
- **BiliBiliBangumi**
|
||||
- **BiliBiliBangumiMedia**
|
||||
- **BiliBiliPlayer**
|
||||
- **BiliBiliSearch**: Bilibili video search; "bilisearch:" prefix
|
||||
- **BilibiliSpaceAudio**
|
||||
@@ -195,6 +197,7 @@
|
||||
- **Camdemy**
|
||||
- **CamdemyFolder**
|
||||
- **CamModels**
|
||||
- **Camsoda**
|
||||
- **CamtasiaEmbed**
|
||||
- **CamWithHer**
|
||||
- **CanalAlpha**
|
||||
@@ -218,7 +221,7 @@
|
||||
- **cbssports:embed**
|
||||
- **CCMA**
|
||||
- **CCTV**: 央视网
|
||||
- **CDA**
|
||||
- **CDA**: [<abbr title="netrc machine"><em>cdapl</em></abbr>]
|
||||
- **Cellebrite**
|
||||
- **CeskaTelevize**
|
||||
- **CGTN**
|
||||
@@ -233,6 +236,7 @@
|
||||
- **cielotv.it**
|
||||
- **Cinchcast**
|
||||
- **Cinemax**
|
||||
- **CinetecaMilano**
|
||||
- **CiscoLiveSearch**
|
||||
- **CiscoLiveSession**
|
||||
- **ciscowebex**: Cisco Webex
|
||||
@@ -272,9 +276,7 @@
|
||||
- **CrowdBunker**
|
||||
- **CrowdBunkerChannel**
|
||||
- **crunchyroll**: [<abbr title="netrc machine"><em>crunchyroll</em></abbr>]
|
||||
- **crunchyroll:beta**: [<abbr title="netrc machine"><em>crunchyroll</em></abbr>]
|
||||
- **crunchyroll:playlist**: [<abbr title="netrc machine"><em>crunchyroll</em></abbr>]
|
||||
- **crunchyroll:playlist:beta**: [<abbr title="netrc machine"><em>crunchyroll</em></abbr>]
|
||||
- **CSpan**: C-SPAN
|
||||
- **CSpanCongress**
|
||||
- **CtsNews**: 華視新聞
|
||||
@@ -311,6 +313,8 @@
|
||||
- **democracynow**
|
||||
- **DestinationAmerica**
|
||||
- **DetikEmbed**
|
||||
- **DeuxM**
|
||||
- **DeuxMNews**
|
||||
- **DHM**: Filmarchiv - Deutsches Historisches Museum
|
||||
- **Digg**
|
||||
- **DigitalConcertHall**: [<abbr title="netrc machine"><em>digitalconcerthall</em></abbr>] DigitalConcertHall extractor
|
||||
@@ -328,7 +332,6 @@
|
||||
- **DIYNetwork**
|
||||
- **dlive:stream**
|
||||
- **dlive:vod**
|
||||
- **DoodStream**
|
||||
- **Dotsub**
|
||||
- **Douyin**
|
||||
- **DouyuShow**
|
||||
@@ -422,6 +425,7 @@
|
||||
- **Foxgay**
|
||||
- **foxnews**: Fox News and Fox Business Video
|
||||
- **foxnews:article**
|
||||
- **FoxNewsVideo**
|
||||
- **FoxSports**
|
||||
- **fptplay**: fptplay.vn
|
||||
- **FranceCulture**
|
||||
@@ -463,6 +467,8 @@
|
||||
- **gem.cbc.ca**: [<abbr title="netrc machine"><em>cbcgem</em></abbr>]
|
||||
- **gem.cbc.ca:live**
|
||||
- **gem.cbc.ca:playlist**
|
||||
- **Genius**
|
||||
- **GeniusLyrics**
|
||||
- **Gettr**
|
||||
- **GettrStreaming**
|
||||
- **Gfycat**
|
||||
@@ -483,7 +489,7 @@
|
||||
- **Golem**
|
||||
- **goodgame:stream**
|
||||
- **google:podcasts**
|
||||
- **google:podcasts:feed**
|
||||
- **google:podcasts:feed**
|
||||
- **GoogleDrive**
|
||||
- **GoogleDrive:Folder**
|
||||
- **GoPlay**: [<abbr title="netrc machine"><em>goplay</em></abbr>]
|
||||
@@ -518,6 +524,7 @@
|
||||
- **HotNewHipHop**
|
||||
- **hotstar**
|
||||
- **hotstar:playlist**
|
||||
- **hotstar:season**
|
||||
- **hotstar:series**
|
||||
- **Howcast**
|
||||
- **HowStuffWorks**
|
||||
@@ -618,7 +625,7 @@
|
||||
- **kuwo:singer**: 酷我音乐 - 歌手
|
||||
- **kuwo:song**: 酷我音乐
|
||||
- **la7.it**
|
||||
- **la7.it:pod:episode**
|
||||
- **la7.it:pod:episode**
|
||||
- **la7.it:podcast**
|
||||
- **laola1tv**
|
||||
- **laola1tv:embed**
|
||||
@@ -652,9 +659,10 @@
|
||||
- **LineLiveChannel**
|
||||
- **LinkedIn**: [<abbr title="netrc machine"><em>linkedin</em></abbr>]
|
||||
- **linkedin:learning**: [<abbr title="netrc machine"><em>linkedin</em></abbr>]
|
||||
- **linkedin:learning:course**: [<abbr title="netrc machine"><em>linkedin</em></abbr>]
|
||||
- **linkedin:learning:course**: [<abbr title="netrc machine"><em>linkedin</em></abbr>]
|
||||
- **LinuxAcademy**: [<abbr title="netrc machine"><em>linuxacademy</em></abbr>]
|
||||
- **Liputan6**
|
||||
- **ListenNotes**
|
||||
- **LiTV**
|
||||
- **LiveJournal**
|
||||
- **livestream**
|
||||
@@ -673,7 +681,7 @@
|
||||
- **MagentaMusik360**
|
||||
- **mailru**: Видео@Mail.Ru
|
||||
- **mailru:music**: Музыка@Mail.Ru
|
||||
- **mailru:music:search**: Музыка@Mail.Ru
|
||||
- **mailru:music:search**: Музыка@Mail.Ru
|
||||
- **MainStreaming**: MainStreaming Player
|
||||
- **MallTV**
|
||||
- **mangomolo:live**
|
||||
@@ -718,7 +726,7 @@
|
||||
- **microsoftstream**: Microsoft Stream
|
||||
- **mildom**: Record ongoing live by specific user in Mildom
|
||||
- **mildom:clip**: Clip in Mildom
|
||||
- **mildom:user:vod**: Download all VODs from specific user in Mildom
|
||||
- **mildom:user:vod**: Download all VODs from specific user in Mildom
|
||||
- **mildom:vod**: VOD in Mildom
|
||||
- **minds**
|
||||
- **minds:channel**
|
||||
@@ -736,6 +744,7 @@
|
||||
- **mixcloud:playlist**
|
||||
- **mixcloud:user**
|
||||
- **MLB**
|
||||
- **MLBArticle**
|
||||
- **MLBTV**: [<abbr title="netrc machine"><em>mlb</em></abbr>]
|
||||
- **MLBVideo**
|
||||
- **MLSSoccer**
|
||||
@@ -753,6 +762,7 @@
|
||||
- **MotherlessGroup**
|
||||
- **Motorsport**: motorsport.com
|
||||
- **MotorTrend**
|
||||
- **MotorTrendOnDemand**
|
||||
- **MovieClips**
|
||||
- **MovieFap**
|
||||
- **Moviepilot**
|
||||
@@ -803,7 +813,7 @@
|
||||
- **navernow**
|
||||
- **NBA**
|
||||
- **nba:watch**
|
||||
- **nba:watch:collection**
|
||||
- **nba:watch:collection**
|
||||
- **NBAChannel**
|
||||
- **NBAEmbed**
|
||||
- **NBAWatchEmbed**
|
||||
@@ -817,7 +827,7 @@
|
||||
- **NBCStations**
|
||||
- **ndr**: NDR.de - Norddeutscher Rundfunk
|
||||
- **ndr:embed**
|
||||
- **ndr:embed:base**
|
||||
- **ndr:embed:base**
|
||||
- **NDTV**
|
||||
- **Nebula**: [<abbr title="netrc machine"><em>watchnebula</em></abbr>]
|
||||
- **nebula:channel**: [<abbr title="netrc machine"><em>watchnebula</em></abbr>]
|
||||
@@ -869,7 +879,7 @@
|
||||
- **niconico:tag**: NicoNico video tag URLs
|
||||
- **NiconicoUser**
|
||||
- **nicovideo:search**: Nico video search; "nicosearch:" prefix
|
||||
- **nicovideo:search:date**: Nico video search, newest first; "nicosearchdate:" prefix
|
||||
- **nicovideo:search:date**: Nico video search, newest first; "nicosearchdate:" prefix
|
||||
- **nicovideo:search_url**: Nico video search URLs
|
||||
- **Nintendo**
|
||||
- **Nitter**
|
||||
@@ -881,6 +891,7 @@
|
||||
- **NoodleMagazine**
|
||||
- **Noovo**
|
||||
- **Normalboots**
|
||||
- **NOSNLArticle**
|
||||
- **NosVideo**
|
||||
- **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
|
||||
- **NovaEmbed**
|
||||
@@ -892,7 +903,7 @@
|
||||
- **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **npo.nl:live**
|
||||
- **npo.nl:radio**
|
||||
- **npo.nl:radio:fragment**
|
||||
- **npo.nl:radio:fragment**
|
||||
- **Npr**
|
||||
- **NRK**
|
||||
- **NRKPlaylist**
|
||||
@@ -915,6 +926,8 @@
|
||||
- **ocw.mit.edu**
|
||||
- **OdaTV**
|
||||
- **Odnoklassniki**
|
||||
- **OfTV**
|
||||
- **OfTVPlaylist**
|
||||
- **OktoberfestTV**
|
||||
- **OlympicsReplay**
|
||||
- **on24**: ON24
|
||||
@@ -933,7 +946,7 @@
|
||||
- **openrec:capture**
|
||||
- **openrec:movie**
|
||||
- **OraTV**
|
||||
- **orf:fm4:story**: fm4.orf.at stories
|
||||
- **orf:fm4:story**: fm4.orf.at stories
|
||||
- **orf:iptv**: iptv.ORF.at
|
||||
- **orf:radio**
|
||||
- **orf:tvthek**: ORF TVthek
|
||||
@@ -981,7 +994,7 @@
|
||||
- **Pinterest**
|
||||
- **PinterestCollection**
|
||||
- **pixiv:sketch**
|
||||
- **pixiv:sketch:user**
|
||||
- **pixiv:sketch:user**
|
||||
- **Pladform**
|
||||
- **PlanetMarathi**
|
||||
- **Platzi**: [<abbr title="netrc machine"><em>platzi</em></abbr>]
|
||||
@@ -999,6 +1012,8 @@
|
||||
- **pluralsight**: [<abbr title="netrc machine"><em>pluralsight</em></abbr>]
|
||||
- **pluralsight:course**
|
||||
- **PlutoTV**
|
||||
- **PodbayFM**
|
||||
- **PodbayFMChannel**
|
||||
- **Podchaser**
|
||||
- **podomatic**
|
||||
- **Pokemon**
|
||||
@@ -1010,7 +1025,7 @@
|
||||
- **polskieradio:kierowcow**
|
||||
- **polskieradio:player**
|
||||
- **polskieradio:podcast**
|
||||
- **polskieradio:podcast:list**
|
||||
- **polskieradio:podcast:list**
|
||||
- **PolskieRadioCategory**
|
||||
- **Popcorntimes**
|
||||
- **PopcornTV**
|
||||
@@ -1042,6 +1057,7 @@
|
||||
- **puhutv:serie**
|
||||
- **Puls4**
|
||||
- **Pyvideo**
|
||||
- **QingTing**
|
||||
- **qqmusic**: QQ音乐
|
||||
- **qqmusic:album**: QQ音乐 - 专辑
|
||||
- **qqmusic:playlist**: QQ音乐 - 歌单
|
||||
@@ -1122,7 +1138,7 @@
|
||||
- **rtl.nl**: rtl.nl and rtlxl.nl
|
||||
- **rtl2**
|
||||
- **rtl2:you**
|
||||
- **rtl2:you:series**
|
||||
- **rtl2:you:series**
|
||||
- **RTLLuLive**
|
||||
- **RTLLuRadio**
|
||||
- **RTNews**
|
||||
@@ -1164,12 +1180,14 @@
|
||||
- **SaltTVLive**: [<abbr title="netrc machine"><em>salttv</em></abbr>]
|
||||
- **SaltTVRecordings**: [<abbr title="netrc machine"><em>salttv</em></abbr>]
|
||||
- **SampleFocus**
|
||||
- **Sangiin**: 参議院インターネット審議中継 (archive)
|
||||
- **Sapo**: SAPO Vídeos
|
||||
- **savefrom.net**
|
||||
- **SBS**: sbs.com.au
|
||||
- **schooltv**
|
||||
- **ScienceChannel**
|
||||
- **screen.yahoo:search**: Yahoo screen search; "yvsearch:" prefix
|
||||
- **Screen9**
|
||||
- **Screencast**
|
||||
- **ScreencastOMatic**
|
||||
- **ScrippsNetworks**
|
||||
@@ -1191,6 +1209,9 @@
|
||||
- **ShareVideosEmbed**
|
||||
- **ShemarooMe**
|
||||
- **ShowRoomLive**
|
||||
- **ShugiinItvLive**: 衆議院インターネット審議中継
|
||||
- **ShugiinItvLiveRoom**: 衆議院インターネット審議中継 (中継)
|
||||
- **ShugiinItvVod**: 衆議院インターネット審議中継 (ビデオライブラリ)
|
||||
- **simplecast**
|
||||
- **simplecast:episode**
|
||||
- **simplecast:podcast**
|
||||
@@ -1198,16 +1219,15 @@
|
||||
- **Skeb**
|
||||
- **sky.it**
|
||||
- **sky:news**
|
||||
- **sky:news:story**
|
||||
- **sky:news:story**
|
||||
- **sky:sports**
|
||||
- **sky:sports:news**
|
||||
- **skyacademy.it**
|
||||
- **sky:sports:news**
|
||||
- **SkylineWebcams**
|
||||
- **skynewsarabia:article**
|
||||
- **skynewsarabia:video**
|
||||
- **SkyNewsAU**
|
||||
- **Slideshare**
|
||||
- **SlidesLive**
|
||||
- **SlidesLive**: (**Currently broken**)
|
||||
- **Slutload**
|
||||
- **Smotrim**
|
||||
- **Snotr**
|
||||
@@ -1277,6 +1297,7 @@
|
||||
- **SVTPage**
|
||||
- **SVTPlay**: SVT Play and Öppet arkiv
|
||||
- **SVTSeries**
|
||||
- **SwearnetEpisode**
|
||||
- **SWRMediathek**
|
||||
- **Syfy**
|
||||
- **SYVDK**
|
||||
@@ -1289,7 +1310,7 @@
|
||||
- **Teachable**: [<abbr title="netrc machine"><em>teachable</em></abbr>]
|
||||
- **TeachableCourse**: [<abbr title="netrc machine"><em>teachable</em></abbr>]
|
||||
- **teachertube**: teachertube.com videos
|
||||
- **teachertube:user:collection**: teachertube.com user and collection videos
|
||||
- **teachertube:user:collection**: teachertube.com user and collection videos
|
||||
- **TeachingChannel**
|
||||
- **Teamcoco**
|
||||
- **TeamTreeHouse**: [<abbr title="netrc machine"><em>teamtreehouse</em></abbr>]
|
||||
@@ -1347,6 +1368,8 @@
|
||||
- **toggo**
|
||||
- **Tokentube**
|
||||
- **Tokentube:channel**
|
||||
- **tokfm:audition**
|
||||
- **tokfm:podcast**
|
||||
- **ToonGoggles**
|
||||
- **tou.tv**: [<abbr title="netrc machine"><em>toutv</em></abbr>]
|
||||
- **Toypics**: Toypics video
|
||||
@@ -1378,7 +1401,6 @@
|
||||
- **Turbo**
|
||||
- **tv.dfb.de**
|
||||
- **TV2**
|
||||
- **TV24UAGenericPassthrough**
|
||||
- **TV2Article**
|
||||
- **TV2DK**
|
||||
- **TV2DKBornholmPlay**
|
||||
@@ -1411,8 +1433,9 @@
|
||||
- **tvopengr:watch**: tvopen.gr (and ethnos.gr) videos
|
||||
- **tvp**: Telewizja Polska
|
||||
- **tvp:embed**: Telewizja Polska
|
||||
- **tvp:series**
|
||||
- **tvp:stream**
|
||||
- **tvp:vod**
|
||||
- **tvp:vod:series**
|
||||
- **TVPlayer**
|
||||
- **TVPlayHome**
|
||||
- **Tweakers**
|
||||
@@ -1431,6 +1454,7 @@
|
||||
- **twitter:broadcast**
|
||||
- **twitter:card**
|
||||
- **twitter:shortener**
|
||||
- **twitter:spaces**
|
||||
- **udemy**: [<abbr title="netrc machine"><em>udemy</em></abbr>]
|
||||
- **udemy:course**: [<abbr title="netrc machine"><em>udemy</em></abbr>]
|
||||
- **UDNEmbed**: 聯合影音
|
||||
@@ -1584,6 +1608,7 @@
|
||||
- **WistiaChannel**
|
||||
- **WistiaPlaylist**
|
||||
- **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **wordpress:mb.miniAudioPlayer**
|
||||
- **wordpress:playlist**
|
||||
- **WorldStarHipHop**
|
||||
- **wppilot**
|
||||
@@ -1591,6 +1616,8 @@
|
||||
- **WSJ**: Wall Street Journal
|
||||
- **WSJArticle**
|
||||
- **WWE**
|
||||
- **wyborcza:video**
|
||||
- **WyborczaPodcast**
|
||||
- **XBef**
|
||||
- **XboxClips**
|
||||
- **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, WolfStream, XVideoSharing
|
||||
@@ -1614,12 +1641,12 @@
|
||||
- **XXXYMovies**
|
||||
- **Yahoo**: Yahoo screen and movies
|
||||
- **yahoo:gyao**
|
||||
- **yahoo:gyao:player**
|
||||
- **yahoo:gyao:player**
|
||||
- **yahoo:japannews**: Yahoo! Japan News
|
||||
- **YandexDisk**
|
||||
- **yandexmusic:album**: Яндекс.Музыка - Альбом
|
||||
- **yandexmusic:artist:albums**: Яндекс.Музыка - Артист - Альбомы
|
||||
- **yandexmusic:artist:tracks**: Яндекс.Музыка - Артист - Треки
|
||||
- **yandexmusic:artist:albums**: Яндекс.Музыка - Артист - Альбомы
|
||||
- **yandexmusic:artist:tracks**: Яндекс.Музыка - Артист - Треки
|
||||
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
||||
- **yandexmusic:track**: Яндекс.Музыка - Трек
|
||||
- **YandexVideo**
|
||||
@@ -1627,6 +1654,7 @@
|
||||
- **YapFiles**
|
||||
- **YesJapan**
|
||||
- **yinyuetai:video**: 音悦Tai
|
||||
- **YleAreena**
|
||||
- **Ynet**
|
||||
- **YouJizz**
|
||||
- **youku**: 优酷
|
||||
@@ -1641,14 +1669,14 @@
|
||||
- **youtube:clip**
|
||||
- **youtube:favorites**: YouTube liked videos; ":ytfav" keyword (requires cookies)
|
||||
- **youtube:history**: Youtube watch history; ":ythis" keyword (requires cookies)
|
||||
- **youtube:music:search_url**: YouTube music search URLs with selectable sections, e.g. #songs
|
||||
- **youtube:music:search_url**: YouTube music search URLs with selectable sections, e.g. #songs
|
||||
- **youtube:notif**: YouTube notifications; ":ytnotif" keyword (requires cookies)
|
||||
- **youtube:playlist**: YouTube playlists
|
||||
- **youtube:recommended**: YouTube recommended videos; ":ytrec" keyword
|
||||
- **youtube:search**: YouTube search; "ytsearch:" prefix
|
||||
- **youtube:search:date**: YouTube search, newest videos first; "ytsearchdate:" prefix
|
||||
- **youtube:search:date**: YouTube search, newest videos first; "ytsearchdate:" prefix
|
||||
- **youtube:search_url**: YouTube search URLs with sorting and filter support
|
||||
- **youtube:shorts:pivot:audio**: YouTube Shorts audio pivot (Shorts using audio of a given video)
|
||||
- **youtube:shorts:pivot:audio**: YouTube Shorts audio pivot (Shorts using audio of a given video)
|
||||
- **youtube:stories**: YouTube channel stories; "ytstories:" prefix
|
||||
- **youtube:subscriptions**: YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)
|
||||
- **youtube:tab**: YouTube Tabs
|
||||
@@ -1665,6 +1693,7 @@
|
||||
- **ZDFChannel**
|
||||
- **Zee5**: [<abbr title="netrc machine"><em>zee5</em></abbr>]
|
||||
- **zee5:series**
|
||||
- **ZeeNews**
|
||||
- **ZenYandex**
|
||||
- **ZenYandexChannel**
|
||||
- **Zhihu**
|
||||
|
||||
@@ -222,6 +222,10 @@ def sanitize_got_info_dict(got_dict):
|
||||
if test_info_dict.get('display_id') == test_info_dict.get('id'):
|
||||
test_info_dict.pop('display_id')
|
||||
|
||||
# Check url for flat entries
|
||||
if got_dict.get('_type', 'video') != 'video' and got_dict.get('url'):
|
||||
test_info_dict['url'] = got_dict['url']
|
||||
|
||||
return test_info_dict
|
||||
|
||||
|
||||
@@ -235,8 +239,9 @@ def expect_info_dict(self, got_dict, expected_dict):
|
||||
for key in mandatory_fields:
|
||||
self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
|
||||
# Check for mandatory fields that are automatically set by YoutubeDL
|
||||
for key in ['webpage_url', 'extractor', 'extractor_key']:
|
||||
self.assertTrue(got_dict.get(key), 'Missing field: %s' % key)
|
||||
if got_dict.get('_type', 'video') == 'video':
|
||||
for key in ['webpage_url', 'extractor', 'extractor_key']:
|
||||
self.assertTrue(got_dict.get(key), 'Missing field: %s' % key)
|
||||
|
||||
test_info_dict = sanitize_got_info_dict(got_dict)
|
||||
|
||||
@@ -249,19 +254,16 @@ def expect_info_dict(self, got_dict, expected_dict):
|
||||
return v.__name__
|
||||
else:
|
||||
return repr(v)
|
||||
info_dict_str = ''
|
||||
if len(missing_keys) != len(expected_dict):
|
||||
info_dict_str += ''.join(
|
||||
f' {_repr(k)}: {_repr(v)},\n'
|
||||
for k, v in test_info_dict.items() if k not in missing_keys)
|
||||
|
||||
if info_dict_str:
|
||||
info_dict_str += '\n'
|
||||
info_dict_str = ''.join(
|
||||
f' {_repr(k)}: {_repr(v)},\n'
|
||||
for k, v in test_info_dict.items() if k not in missing_keys)
|
||||
if info_dict_str:
|
||||
info_dict_str += '\n'
|
||||
info_dict_str += ''.join(
|
||||
f' {_repr(k)}: {_repr(test_info_dict[k])},\n'
|
||||
for k in missing_keys)
|
||||
write_string(
|
||||
'\n\'info_dict\': {\n' + info_dict_str + '},\n', out=sys.stderr)
|
||||
info_dict_str = '\n\'info_dict\': {\n' + info_dict_str + '},\n'
|
||||
write_string(info_dict_str.replace('\n', '\n '), out=sys.stderr)
|
||||
self.assertFalse(
|
||||
missing_keys,
|
||||
'Missing keys in test definition: %s' % (
|
||||
|
||||
@@ -11,7 +11,6 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
import base64
|
||||
|
||||
from yt_dlp.aes import (
|
||||
BLOCK_SIZE_BYTES,
|
||||
aes_cbc_decrypt,
|
||||
aes_cbc_decrypt_bytes,
|
||||
aes_cbc_encrypt,
|
||||
@@ -103,8 +102,7 @@ class TestAES(unittest.TestCase):
|
||||
|
||||
def test_ecb_encrypt(self):
|
||||
data = bytes_to_intlist(self.secret_msg)
|
||||
data += [0x08] * (BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES)
|
||||
encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key, self.iv))
|
||||
encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key))
|
||||
self.assertEqual(
|
||||
encrypted,
|
||||
b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:')
|
||||
|
||||
@@ -277,9 +277,24 @@ class TestLenientSimpleCookie(unittest.TestCase):
|
||||
"a=b; invalid; Version=1; c=d",
|
||||
{"a": "b", "c": "d"},
|
||||
),
|
||||
(
|
||||
"Reset morsel after invalid to not capture attributes",
|
||||
"a=b; $invalid; $Version=1; c=d",
|
||||
{"a": "b", "c": "d"},
|
||||
),
|
||||
(
|
||||
"Continue after non-flag attribute without value",
|
||||
"a=b; path; Version=1; c=d",
|
||||
{"a": "b", "c": "d"},
|
||||
),
|
||||
(
|
||||
"Allow cookie attributes with `$` prefix",
|
||||
'Customer="WILE_E_COYOTE"; $Version=1; $Secure; $Path=/acme',
|
||||
{"Customer": ("WILE_E_COYOTE", {"version": "1", "secure": True, "path": "/acme"})},
|
||||
),
|
||||
(
|
||||
"Invalid Morsel keys should not result in an error",
|
||||
"Key=Value; [Invalid]=Value; Another=Value",
|
||||
{"Key": "Value", "Another": "Value"},
|
||||
),
|
||||
)
|
||||
|
||||
@@ -106,7 +106,7 @@ def generator(test_case, tname):
|
||||
params = tc.get('params', {})
|
||||
if not info_dict.get('id'):
|
||||
raise Exception(f'Test {tname} definition incorrect - "id" key is not present')
|
||||
elif not info_dict.get('ext'):
|
||||
elif not info_dict.get('ext') and info_dict.get('_type', 'video') == 'video':
|
||||
if params.get('skip_download') and params.get('ignore_no_formats_error'):
|
||||
continue
|
||||
raise Exception(f'Test {tname} definition incorrect - "ext" key must be present to define the output file')
|
||||
@@ -122,7 +122,8 @@ def generator(test_case, tname):
|
||||
params['outtmpl'] = tname + '_' + params['outtmpl']
|
||||
if is_playlist and 'playlist' not in test_case:
|
||||
params.setdefault('extract_flat', 'in_playlist')
|
||||
params.setdefault('playlistend', test_case.get('playlist_mincount'))
|
||||
params.setdefault('playlistend', test_case.get(
|
||||
'playlist_mincount', test_case.get('playlist_count', -2) + 1))
|
||||
params.setdefault('skip_download', True)
|
||||
|
||||
ydl = YoutubeDL(params, auto_init=False)
|
||||
@@ -212,6 +213,8 @@ def generator(test_case, tname):
|
||||
tc_res_dict = res_dict['entries'][tc_num]
|
||||
# First, check test cases' data against extracted data alone
|
||||
expect_info_dict(self, tc_res_dict, tc.get('info_dict', {}))
|
||||
if tc_res_dict.get('_type', 'video') != 'video':
|
||||
continue
|
||||
# Now, check downloaded file consistency
|
||||
tc_filename = get_tc_filename(tc)
|
||||
if not test_case.get('params', {}).get('skip_download', False):
|
||||
|
||||
@@ -392,6 +392,11 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x').pattern, r',][}",],()}(\[)')
|
||||
|
||||
jsi = JSInterpreter(R'''
|
||||
function x() { let a=[/[)\\]/]; return a[0]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x').pattern, r'[)\\]')
|
||||
|
||||
def test_char_code_at(self):
|
||||
jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}')
|
||||
self.assertEqual(jsi.call_function('x', 0), 116)
|
||||
|
||||
@@ -16,6 +16,7 @@ from yt_dlp.postprocessor import (
|
||||
MetadataFromFieldPP,
|
||||
MetadataParserPP,
|
||||
ModifyChaptersPP,
|
||||
SponsorBlockPP,
|
||||
)
|
||||
|
||||
|
||||
@@ -76,11 +77,15 @@ class TestModifyChaptersPP(unittest.TestCase):
|
||||
self._pp = ModifyChaptersPP(YoutubeDL())
|
||||
|
||||
@staticmethod
|
||||
def _sponsor_chapter(start, end, cat, remove=False):
|
||||
c = {'start_time': start, 'end_time': end, '_categories': [(cat, start, end)]}
|
||||
if remove:
|
||||
c['remove'] = True
|
||||
return c
|
||||
def _sponsor_chapter(start, end, cat, remove=False, title=None):
|
||||
if title is None:
|
||||
title = SponsorBlockPP.CATEGORIES[cat]
|
||||
return {
|
||||
'start_time': start,
|
||||
'end_time': end,
|
||||
'_categories': [(cat, start, end, title)],
|
||||
**({'remove': True} if remove else {}),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _chapter(start, end, title=None, remove=False):
|
||||
@@ -130,6 +135,19 @@ class TestModifyChaptersPP(unittest.TestCase):
|
||||
'c', '[SponsorBlock]: Filler Tangent', 'c'])
|
||||
self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
|
||||
|
||||
def test_remove_marked_arrange_sponsors_SponsorBlockChapters(self):
|
||||
chapters = self._chapters([70], ['c']) + [
|
||||
self._sponsor_chapter(10, 20, 'chapter', title='sb c1'),
|
||||
self._sponsor_chapter(15, 16, 'chapter', title='sb c2'),
|
||||
self._sponsor_chapter(30, 40, 'preview'),
|
||||
self._sponsor_chapter(50, 60, 'filler')]
|
||||
expected = self._chapters(
|
||||
[10, 15, 16, 20, 30, 40, 50, 60, 70],
|
||||
['c', '[SponsorBlock]: sb c1', '[SponsorBlock]: sb c1, sb c2', '[SponsorBlock]: sb c1',
|
||||
'c', '[SponsorBlock]: Preview/Recap',
|
||||
'c', '[SponsorBlock]: Filler Tangent', 'c'])
|
||||
self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
|
||||
|
||||
def test_remove_marked_arrange_sponsors_UniqueNamesForOverlappingSponsors(self):
|
||||
chapters = self._chapters([120], ['c']) + [
|
||||
self._sponsor_chapter(10, 45, 'sponsor'), self._sponsor_chapter(20, 40, 'selfpromo'),
|
||||
@@ -173,7 +191,7 @@ class TestModifyChaptersPP(unittest.TestCase):
|
||||
self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts)
|
||||
|
||||
def test_remove_marked_arrange_sponsors_ChapterWithCutHidingSponsor(self):
|
||||
cuts = [self._sponsor_chapter(20, 50, 'selpromo', remove=True)]
|
||||
cuts = [self._sponsor_chapter(20, 50, 'selfpromo', remove=True)]
|
||||
chapters = self._chapters([60], ['c']) + [
|
||||
self._sponsor_chapter(10, 20, 'intro'),
|
||||
self._sponsor_chapter(30, 40, 'sponsor'),
|
||||
@@ -199,7 +217,7 @@ class TestModifyChaptersPP(unittest.TestCase):
|
||||
self._sponsor_chapter(10, 20, 'sponsor'),
|
||||
self._sponsor_chapter(20, 30, 'interaction', remove=True),
|
||||
self._chapter(30, 40, remove=True),
|
||||
self._sponsor_chapter(40, 50, 'selpromo', remove=True),
|
||||
self._sponsor_chapter(40, 50, 'selfpromo', remove=True),
|
||||
self._sponsor_chapter(50, 60, 'interaction')]
|
||||
expected = self._chapters([10, 20, 30, 40],
|
||||
['c', '[SponsorBlock]: Sponsor',
|
||||
@@ -282,7 +300,7 @@ class TestModifyChaptersPP(unittest.TestCase):
|
||||
chapters = self._chapters([70], ['c']) + [
|
||||
self._sponsor_chapter(10, 30, 'sponsor'),
|
||||
self._sponsor_chapter(20, 50, 'interaction'),
|
||||
self._sponsor_chapter(30, 50, 'selpromo', remove=True),
|
||||
self._sponsor_chapter(30, 50, 'selfpromo', remove=True),
|
||||
self._sponsor_chapter(40, 60, 'sponsor'),
|
||||
self._sponsor_chapter(50, 60, 'interaction')]
|
||||
expected = self._chapters(
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
@@ -1099,6 +1100,12 @@ class TestUtil(unittest.TestCase):
|
||||
on = js_to_json('[1,//{},\n2]')
|
||||
self.assertEqual(json.loads(on), [1, 2])
|
||||
|
||||
on = js_to_json(R'"\^\$\#"')
|
||||
self.assertEqual(json.loads(on), R'^$#', msg='Unnecessary escapes should be stripped')
|
||||
|
||||
on = js_to_json('\'"\\""\'')
|
||||
self.assertEqual(json.loads(on), '"""', msg='Unnecessary quote escape should be escaped')
|
||||
|
||||
def test_js_to_json_malformed(self):
|
||||
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
||||
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
||||
@@ -1678,6 +1685,9 @@ Line 1
|
||||
self.assertEqual(list(get_elements_text_and_html_by_attribute('class', 'foo', html)), [])
|
||||
self.assertEqual(list(get_elements_text_and_html_by_attribute('class', 'no-such-foo', html)), [])
|
||||
|
||||
self.assertEqual(list(get_elements_text_and_html_by_attribute(
|
||||
'class', 'foo', '<a class="foo">nice</a><span class="foo">nice</span>', tag='a')), [('nice', '<a class="foo">nice</a>')])
|
||||
|
||||
GET_ELEMENT_BY_TAG_TEST_STRING = '''
|
||||
random text lorem ipsum</p>
|
||||
<div>
|
||||
@@ -1890,6 +1900,7 @@ Line 1
|
||||
{'index': 2},
|
||||
{'index': 3},
|
||||
),
|
||||
'dict': {},
|
||||
}
|
||||
|
||||
# Test base functionality
|
||||
@@ -1926,11 +1937,15 @@ Line 1
|
||||
|
||||
# Test alternative paths
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
|
||||
msg='multiple `path_list` should be treated as alternative paths')
|
||||
msg='multiple `paths` should be treated as alternative paths')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 'str', 100), 'str',
|
||||
msg='alternatives should exit early')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'fail'), None,
|
||||
msg='alternatives should return `default` if exhausted')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, (..., 'fail'), 100), 100,
|
||||
msg='alternatives should track their own branching return')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('dict', ...), ('data', ...)), list(_TEST_DATA['data']),
|
||||
msg='alternatives on empty objects should search further')
|
||||
|
||||
# Test branch and path nesting
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')), ['https://www.example.com/0'],
|
||||
@@ -1963,8 +1978,16 @@ Line 1
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}),
|
||||
{0: ['https://www.example.com/1', 'https://www.example.com/0']},
|
||||
msg='tripple nesting in dict path should be treated as branches')
|
||||
self.assertEqual(traverse_obj({}, {0: 1}, default=...), {0: ...},
|
||||
msg='do not remove `None` values when dict key')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {},
|
||||
msg='remove `None` values when dict key')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=...), {0: ...},
|
||||
msg='do not remove `None` values if `default`')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {0: {}},
|
||||
msg='do not remove empty values when dict key')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=...), {0: {}},
|
||||
msg='do not remove empty values when dict key and a default')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', ...)}), {0: []},
|
||||
msg='if branch in dict key not successful, return `[]`')
|
||||
|
||||
# Testing default parameter behavior
|
||||
_DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
|
||||
@@ -1981,7 +2004,13 @@ Line 1
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', 10)), None,
|
||||
msg='`IndexError` should result in `default`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=1), 1,
|
||||
msg='if branched but not successfull return `default`, not `[]`')
|
||||
msg='if branched but not successful return `default` if defined, not `[]`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=None), None,
|
||||
msg='if branched but not successful return `default` even if `default` is `None`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail')), [],
|
||||
msg='if branched but not successful return `[]`, not `default`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', ...)), [],
|
||||
msg='if branched but object is empty return `[]`, not `default`')
|
||||
|
||||
# Testing expected_type behavior
|
||||
_EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
|
||||
@@ -2061,6 +2090,25 @@ Line 1
|
||||
with self.assertRaises(TypeError, msg='too many params should result in error'):
|
||||
traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':::'), is_user_input=True)
|
||||
|
||||
# Test re.Match as input obj
|
||||
mobj = re.fullmatch(r'0(12)(?P<group>3)(4)?', '0123')
|
||||
self.assertEqual(traverse_obj(mobj, ...), [x for x in mobj.groups() if x is not None],
|
||||
msg='`...` on a `re.Match` should give its `groups()`')
|
||||
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 2)), ['0123', '3'],
|
||||
msg='function on a `re.Match` should give groupno, value starting at 0')
|
||||
self.assertEqual(traverse_obj(mobj, 'group'), '3',
|
||||
msg='str key on a `re.Match` should give group with that name')
|
||||
self.assertEqual(traverse_obj(mobj, 2), '3',
|
||||
msg='int key on a `re.Match` should give group with that name')
|
||||
self.assertEqual(traverse_obj(mobj, 'gRoUp', casesense=False), '3',
|
||||
msg='str key on a `re.Match` should respect casesense')
|
||||
self.assertEqual(traverse_obj(mobj, 'fail'), None,
|
||||
msg='failing str key on a `re.Match` should return `default`')
|
||||
self.assertEqual(traverse_obj(mobj, 'gRoUpS', casesense=False), None,
|
||||
msg='failing str key on a `re.Match` should return `default`')
|
||||
self.assertEqual(traverse_obj(mobj, 8), None,
|
||||
msg='failing int key on a `re.Match` should return `default`')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL, is_download_test
|
||||
from yt_dlp.extractor import YoutubeIE, YoutubeTabIE
|
||||
from yt_dlp.utils import ExtractorError
|
||||
|
||||
|
||||
@is_download_test
|
||||
@@ -53,6 +54,18 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
self.assertEqual(video['duration'], 10)
|
||||
self.assertEqual(video['uploader'], 'Philipp Hagemeister')
|
||||
|
||||
def test_youtube_channel_no_uploads(self):
|
||||
dl = FakeYDL()
|
||||
dl.params['extract_flat'] = True
|
||||
ie = YoutubeTabIE(dl)
|
||||
# no uploads
|
||||
with self.assertRaisesRegex(ExtractorError, r'no uploads'):
|
||||
ie.extract('https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA')
|
||||
|
||||
# no uploads and no UCID given
|
||||
with self.assertRaisesRegex(ExtractorError, r'no uploads'):
|
||||
ie.extract('https://www.youtube.com/news')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -130,6 +130,10 @@ _NSIG_TESTS = [
|
||||
'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
|
||||
'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/7a062b77/player_ias.vflset/en_US/base.js',
|
||||
'NRcE3y3mVtm_cV-W', 'VbsCYUATvqlt5w',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
|
||||
@@ -548,7 +548,7 @@ class YoutubeDL:
|
||||
# NB: Keep in sync with the docstring of extractor/common.py
|
||||
'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
|
||||
'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
|
||||
'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
|
||||
'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
|
||||
'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
|
||||
'preference', 'language', 'language_preference', 'quality', 'source_preference',
|
||||
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
|
||||
@@ -616,46 +616,6 @@ class YoutubeDL:
|
||||
' If you experience any issues while using this option, '
|
||||
f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
|
||||
|
||||
def check_deprecated(param, option, suggestion):
|
||||
if self.params.get(param) is not None:
|
||||
self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
|
||||
return True
|
||||
return False
|
||||
|
||||
if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
|
||||
if self.params.get('geo_verification_proxy') is None:
|
||||
self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
|
||||
|
||||
check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
|
||||
check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
|
||||
check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
|
||||
|
||||
for msg in self.params.get('_warnings', []):
|
||||
self.report_warning(msg)
|
||||
for msg in self.params.get('_deprecation_warnings', []):
|
||||
self.deprecated_feature(msg)
|
||||
|
||||
self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
|
||||
if 'list-formats' in self.params['compat_opts']:
|
||||
self.params['listformats_table'] = False
|
||||
|
||||
if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
|
||||
# nooverwrites was unnecessarily changed to overwrites
|
||||
# in 0c3d0f51778b153f65c21906031c2e091fcfb641
|
||||
# This ensures compatibility with both keys
|
||||
self.params['overwrites'] = not self.params['nooverwrites']
|
||||
elif self.params.get('overwrites') is None:
|
||||
self.params.pop('overwrites', None)
|
||||
else:
|
||||
self.params['nooverwrites'] = not self.params['overwrites']
|
||||
|
||||
self.params.setdefault('forceprint', {})
|
||||
self.params.setdefault('print_to_file', {})
|
||||
|
||||
# Compatibility with older syntax
|
||||
if not isinstance(params['forceprint'], dict):
|
||||
self.params['forceprint'] = {'video': params['forceprint']}
|
||||
|
||||
if self.params.get('bidi_workaround', False):
|
||||
try:
|
||||
import pty
|
||||
@@ -676,9 +636,57 @@ class YoutubeDL:
|
||||
else:
|
||||
raise
|
||||
|
||||
self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
|
||||
if auto_init and auto_init != 'no_verbose_header':
|
||||
self.print_debug_header()
|
||||
|
||||
def check_deprecated(param, option, suggestion):
|
||||
if self.params.get(param) is not None:
|
||||
self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
|
||||
return True
|
||||
return False
|
||||
|
||||
if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
|
||||
if self.params.get('geo_verification_proxy') is None:
|
||||
self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
|
||||
|
||||
check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
|
||||
check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
|
||||
check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
|
||||
|
||||
for msg in self.params.get('_warnings', []):
|
||||
self.report_warning(msg)
|
||||
for msg in self.params.get('_deprecation_warnings', []):
|
||||
self.deprecated_feature(msg)
|
||||
|
||||
if 'list-formats' in self.params['compat_opts']:
|
||||
self.params['listformats_table'] = False
|
||||
|
||||
if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
|
||||
# nooverwrites was unnecessarily changed to overwrites
|
||||
# in 0c3d0f51778b153f65c21906031c2e091fcfb641
|
||||
# This ensures compatibility with both keys
|
||||
self.params['overwrites'] = not self.params['nooverwrites']
|
||||
elif self.params.get('overwrites') is None:
|
||||
self.params.pop('overwrites', None)
|
||||
else:
|
||||
self.params['nooverwrites'] = not self.params['overwrites']
|
||||
|
||||
if self.params.get('simulate') is None and any((
|
||||
self.params.get('list_thumbnails'),
|
||||
self.params.get('listformats'),
|
||||
self.params.get('listsubtitles'),
|
||||
)):
|
||||
self.params['simulate'] = 'list_only'
|
||||
|
||||
self.params.setdefault('forceprint', {})
|
||||
self.params.setdefault('print_to_file', {})
|
||||
|
||||
# Compatibility with older syntax
|
||||
if not isinstance(params['forceprint'], dict):
|
||||
self.params['forceprint'] = {'video': params['forceprint']}
|
||||
|
||||
if auto_init:
|
||||
if auto_init != 'no_verbose_header':
|
||||
self.print_debug_header()
|
||||
self.add_default_info_extractors()
|
||||
|
||||
if (sys.platform != 'win32'
|
||||
@@ -1249,7 +1257,7 @@ class YoutubeDL:
|
||||
elif fmt[-1] == 'j': # json
|
||||
value, fmt = json.dumps(
|
||||
value, default=_dumpjson_default,
|
||||
indent=4 if '#' in flags else None, ensure_ascii=False), str_fmt
|
||||
indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
|
||||
elif fmt[-1] == 'h': # html
|
||||
value, fmt = escapeHTML(str(value)), str_fmt
|
||||
elif fmt[-1] == 'q': # quoted
|
||||
@@ -1621,6 +1629,7 @@ class YoutubeDL:
|
||||
self.add_default_extra_info(info_copy, ie, ie_result['url'])
|
||||
self.add_extra_info(info_copy, extra_info)
|
||||
info_copy, _ = self.pre_process(info_copy)
|
||||
self._fill_common_fields(info_copy, False)
|
||||
self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
|
||||
self._raise_pending_errors(info_copy)
|
||||
if self.params.get('force_write_download_archive', False):
|
||||
@@ -2379,10 +2388,9 @@ class YoutubeDL:
|
||||
else:
|
||||
info_dict['thumbnails'] = thumbnails
|
||||
|
||||
def _fill_common_fields(self, info_dict, is_video=True):
|
||||
def _fill_common_fields(self, info_dict, final=True):
|
||||
# TODO: move sanitization here
|
||||
if is_video:
|
||||
# playlists are allowed to lack "title"
|
||||
if final:
|
||||
title = info_dict.get('title', NO_DEFAULT)
|
||||
if title is NO_DEFAULT:
|
||||
raise ExtractorError('Missing "title" field in extractor result',
|
||||
@@ -2432,7 +2440,7 @@ class YoutubeDL:
|
||||
# Auto generate title fields corresponding to the *_number fields when missing
|
||||
# in order to always have clean titles. This is very common for TV series.
|
||||
for field in ('chapter', 'season', 'episode'):
|
||||
if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
|
||||
if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
|
||||
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
|
||||
|
||||
def _raise_pending_errors(self, info):
|
||||
@@ -2525,11 +2533,7 @@ class YoutubeDL:
|
||||
info_dict['requested_subtitles'] = self.process_subtitles(
|
||||
info_dict['id'], subtitles, automatic_captions)
|
||||
|
||||
if info_dict.get('formats') is None:
|
||||
# There's only one format available
|
||||
formats = [info_dict]
|
||||
else:
|
||||
formats = info_dict['formats']
|
||||
formats = self._get_formats(info_dict)
|
||||
|
||||
# or None ensures --clean-infojson removes it
|
||||
info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
|
||||
@@ -2644,10 +2648,9 @@ class YoutubeDL:
|
||||
info_dict, _ = self.pre_process(info_dict, 'after_filter')
|
||||
|
||||
# The pre-processors may have modified the formats
|
||||
formats = info_dict.get('formats', [info_dict])
|
||||
formats = self._get_formats(info_dict)
|
||||
|
||||
list_only = self.params.get('simulate') is None and (
|
||||
self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
|
||||
list_only = self.params.get('simulate') == 'list_only'
|
||||
interactive_format_selection = not list_only and self.format_selector == '-'
|
||||
if self.params.get('list_thumbnails'):
|
||||
self.list_thumbnails(info_dict)
|
||||
@@ -2724,7 +2727,8 @@ class YoutubeDL:
|
||||
if chapter or offset:
|
||||
new_info.update({
|
||||
'section_start': offset + chapter.get('start_time', 0),
|
||||
'section_end': end_time if end_time < offset + duration else None,
|
||||
# duration may not be accurate. So allow deviations <1sec
|
||||
'section_end': end_time if end_time <= offset + duration + 1 else None,
|
||||
'section_title': chapter.get('title'),
|
||||
'section_number': chapter.get('index'),
|
||||
})
|
||||
@@ -3571,11 +3575,17 @@ class YoutubeDL:
|
||||
res += '~' + format_bytes(fdict['filesize_approx'])
|
||||
return res
|
||||
|
||||
def render_formats_table(self, info_dict):
|
||||
if not info_dict.get('formats') and not info_dict.get('url'):
|
||||
return None
|
||||
def _get_formats(self, info_dict):
|
||||
if info_dict.get('formats') is None:
|
||||
if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
|
||||
return [info_dict]
|
||||
return []
|
||||
return info_dict['formats']
|
||||
|
||||
formats = info_dict.get('formats', [info_dict])
|
||||
def render_formats_table(self, info_dict):
|
||||
formats = self._get_formats(info_dict)
|
||||
if not formats:
|
||||
return
|
||||
if not self.params.get('listformats_table', True) is not False:
|
||||
table = [
|
||||
[
|
||||
@@ -3583,7 +3593,7 @@ class YoutubeDL:
|
||||
format_field(f, 'ext'),
|
||||
self.format_resolution(f),
|
||||
self._format_note(f)
|
||||
] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
|
||||
] for f in formats if (f.get('preference') or 0) >= -1000]
|
||||
return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
|
||||
|
||||
def simplified_codec(f, field):
|
||||
@@ -3725,6 +3735,10 @@ class YoutubeDL:
|
||||
'' if source == 'unknown' else f'({source})',
|
||||
'' if _IN_CLI else 'API',
|
||||
delim=' '))
|
||||
|
||||
if not _IN_CLI:
|
||||
write_debug(f'params: {self.params}')
|
||||
|
||||
if not _LAZY_LOADER:
|
||||
if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
|
||||
write_debug('Lazy loading extractors is forcibly disabled')
|
||||
|
||||
@@ -962,6 +962,8 @@ def _real_main(argv=None):
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
global _IN_CLI
|
||||
_IN_CLI = True
|
||||
try:
|
||||
_exit(*variadic(_real_main(argv)))
|
||||
except DownloadError:
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
|
||||
import sys
|
||||
|
||||
if __package__ is None and not hasattr(sys, 'frozen'):
|
||||
if __package__ is None and not getattr(sys, 'frozen', False):
|
||||
# direct call of __main__.py
|
||||
import os.path
|
||||
path = os.path.realpath(os.path.abspath(__file__))
|
||||
@@ -14,5 +14,4 @@ if __package__ is None and not hasattr(sys, 'frozen'):
|
||||
import yt_dlp
|
||||
|
||||
if __name__ == '__main__':
|
||||
yt_dlp._IN_CLI = True
|
||||
yt_dlp.main()
|
||||
|
||||
@@ -28,11 +28,23 @@ def aes_cbc_encrypt_bytes(data, key, iv, **kwargs):
|
||||
return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs))
|
||||
|
||||
|
||||
BLOCK_SIZE_BYTES = 16
|
||||
|
||||
|
||||
def unpad_pkcs7(data):
|
||||
return data[:-compat_ord(data[-1])]
|
||||
|
||||
|
||||
BLOCK_SIZE_BYTES = 16
|
||||
def pkcs7_padding(data):
|
||||
"""
|
||||
PKCS#7 padding
|
||||
|
||||
@param {int[]} data cleartext
|
||||
@returns {int[]} padding data
|
||||
"""
|
||||
|
||||
remaining_length = BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES
|
||||
return data + [remaining_length] * remaining_length
|
||||
|
||||
|
||||
def pad_block(block, padding_mode):
|
||||
@@ -64,7 +76,7 @@ def pad_block(block, padding_mode):
|
||||
|
||||
def aes_ecb_encrypt(data, key, iv=None):
|
||||
"""
|
||||
Encrypt with aes in ECB mode
|
||||
Encrypt with aes in ECB mode. Using PKCS#7 padding
|
||||
|
||||
@param {int[]} data cleartext
|
||||
@param {int[]} key 16/24/32-Byte cipher key
|
||||
@@ -77,8 +89,7 @@ def aes_ecb_encrypt(data, key, iv=None):
|
||||
encrypted_data = []
|
||||
for i in range(block_count):
|
||||
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
|
||||
encrypted_data += aes_encrypt(block, expanded_key)
|
||||
encrypted_data = encrypted_data[:len(data)]
|
||||
encrypted_data += aes_encrypt(pkcs7_padding(block), expanded_key)
|
||||
|
||||
return encrypted_data
|
||||
|
||||
@@ -551,5 +562,6 @@ __all__ = [
|
||||
|
||||
'key_expansion',
|
||||
'pad_block',
|
||||
'pkcs7_padding',
|
||||
'unpad_pkcs7',
|
||||
]
|
||||
|
||||
@@ -14,7 +14,7 @@ passthrough_module(__name__, '._legacy', callback=lambda attr: warnings.warn(
|
||||
# HTMLParseError has been deprecated in Python 3.3 and removed in
|
||||
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
|
||||
# and uniform cross-version exception handling
|
||||
class compat_HTMLParseError(Exception):
|
||||
class compat_HTMLParseError(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
|
||||
@@ -48,6 +48,7 @@ def compat_setenv(key, value, env=os.environ):
|
||||
|
||||
|
||||
compat_basestring = str
|
||||
compat_casefold = str.casefold
|
||||
compat_chr = chr
|
||||
compat_collections_abc = collections.abc
|
||||
compat_cookiejar = http.cookiejar
|
||||
|
||||
30
yt_dlp/compat/shutil.py
Normal file
30
yt_dlp/compat/shutil.py
Normal file
@@ -0,0 +1,30 @@
|
||||
# flake8: noqa: F405
|
||||
from shutil import * # noqa: F403
|
||||
|
||||
from .compat_utils import passthrough_module
|
||||
|
||||
passthrough_module(__name__, 'shutil')
|
||||
del passthrough_module
|
||||
|
||||
|
||||
import sys
|
||||
|
||||
if sys.platform.startswith('freebsd'):
|
||||
import errno
|
||||
import os
|
||||
import shutil
|
||||
|
||||
# Workaround for PermissionError when using restricted ACL mode on FreeBSD
|
||||
def copy2(src, dst, *args, **kwargs):
|
||||
if os.path.isdir(dst):
|
||||
dst = os.path.join(dst, os.path.basename(src))
|
||||
shutil.copyfile(src, dst, *args, **kwargs)
|
||||
try:
|
||||
shutil.copystat(src, dst, *args, **kwargs)
|
||||
except PermissionError as e:
|
||||
if e.errno != getattr(errno, 'EPERM', None):
|
||||
raise
|
||||
return dst
|
||||
|
||||
def move(*args, copy_function=copy2, **kwargs):
|
||||
return shutil.move(*args, copy_function=copy_function, **kwargs)
|
||||
@@ -999,8 +999,9 @@ def _parse_browser_specification(browser_name, profile=None, keyring=None, conta
|
||||
class LenientSimpleCookie(http.cookies.SimpleCookie):
|
||||
"""More lenient version of http.cookies.SimpleCookie"""
|
||||
# From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
|
||||
_LEGAL_KEY_CHARS = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
|
||||
_LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + r"\[\]"
|
||||
# We use Morsel's legal key chars to avoid errors on setting values
|
||||
_LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
|
||||
_LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
|
||||
|
||||
_RESERVED = {
|
||||
"expires",
|
||||
@@ -1046,25 +1047,17 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
||||
return super().load(data)
|
||||
|
||||
morsel = None
|
||||
index = 0
|
||||
length = len(data)
|
||||
|
||||
while 0 <= index < length:
|
||||
match = self._COOKIE_PATTERN.search(data, index)
|
||||
if not match:
|
||||
break
|
||||
|
||||
index = match.end(0)
|
||||
if match.group("bad"):
|
||||
for match in self._COOKIE_PATTERN.finditer(data):
|
||||
if match.group('bad'):
|
||||
morsel = None
|
||||
continue
|
||||
|
||||
key, value = match.group("key", "val")
|
||||
key, value = match.group('key', 'val')
|
||||
|
||||
if key[0] == "$":
|
||||
if morsel is not None:
|
||||
morsel[key[1:]] = True
|
||||
continue
|
||||
is_attribute = False
|
||||
if key.startswith('$'):
|
||||
key = key[1:]
|
||||
is_attribute = True
|
||||
|
||||
lower_key = key.lower()
|
||||
if lower_key in self._RESERVED:
|
||||
@@ -1081,6 +1074,9 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
||||
|
||||
morsel[key] = value
|
||||
|
||||
elif is_attribute:
|
||||
morsel = None
|
||||
|
||||
elif value is not None:
|
||||
morsel = self.get(key, http.cookies.Morsel())
|
||||
real_value, coded_value = self.value_decode(value)
|
||||
|
||||
@@ -333,7 +333,7 @@ class FileDownloader:
|
||||
return tmpl
|
||||
return default
|
||||
|
||||
_formats_bytes = lambda k: f'{format_bytes(s.get(k)):>10s}'
|
||||
_format_bytes = lambda k: f'{format_bytes(s.get(k)):>10s}'
|
||||
|
||||
if s['status'] == 'finished':
|
||||
if self.params.get('noprogress'):
|
||||
@@ -342,7 +342,7 @@ class FileDownloader:
|
||||
s.update({
|
||||
'speed': speed,
|
||||
'_speed_str': self.format_speed(speed).strip(),
|
||||
'_total_bytes_str': _formats_bytes('total_bytes'),
|
||||
'_total_bytes_str': _format_bytes('total_bytes'),
|
||||
'_elapsed_str': self.format_seconds(s.get('elapsed')),
|
||||
'_percent_str': self.format_percent(100),
|
||||
})
|
||||
@@ -363,9 +363,9 @@ class FileDownloader:
|
||||
lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
|
||||
lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
|
||||
lambda: s['downloaded_bytes'] == 0 and 0)),
|
||||
'_total_bytes_str': _formats_bytes('total_bytes'),
|
||||
'_total_bytes_estimate_str': _formats_bytes('total_bytes_estimate'),
|
||||
'_downloaded_bytes_str': _formats_bytes('downloaded_bytes'),
|
||||
'_total_bytes_str': _format_bytes('total_bytes'),
|
||||
'_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'),
|
||||
'_downloaded_bytes_str': _format_bytes('downloaded_bytes'),
|
||||
'_elapsed_str': self.format_seconds(s.get('elapsed')),
|
||||
})
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@ class DashSegmentsFD(FragmentFD):
|
||||
|
||||
args.append([ctx, fragments_to_download, fmt])
|
||||
|
||||
return self.download_and_append_fragments_multiple(*args)
|
||||
return self.download_and_append_fragments_multiple(*args, is_fatal=lambda idx: idx == 0)
|
||||
|
||||
def _resolve_fragments(self, fragments, ctx):
|
||||
fragments = fragments(ctx) if callable(fragments) else fragments
|
||||
|
||||
@@ -424,6 +424,4 @@ class F4mFD(FragmentFD):
|
||||
msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
|
||||
self.report_warning(msg)
|
||||
|
||||
self._finish_frag_download(ctx, info_dict)
|
||||
|
||||
return True
|
||||
return self._finish_frag_download(ctx, info_dict)
|
||||
|
||||
@@ -295,16 +295,23 @@ class FragmentFD(FileDownloader):
|
||||
self.try_remove(ytdl_filename)
|
||||
elapsed = time.time() - ctx['started']
|
||||
|
||||
if ctx['tmpfilename'] == '-':
|
||||
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
|
||||
to_file = ctx['tmpfilename'] != '-'
|
||||
if to_file:
|
||||
downloaded_bytes = os.path.getsize(encodeFilename(ctx['tmpfilename']))
|
||||
else:
|
||||
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
|
||||
|
||||
if not downloaded_bytes:
|
||||
if to_file:
|
||||
self.try_remove(ctx['tmpfilename'])
|
||||
self.report_error('The downloaded file is empty')
|
||||
return False
|
||||
elif to_file:
|
||||
self.try_rename(ctx['tmpfilename'], ctx['filename'])
|
||||
if self.params.get('updatetime', True):
|
||||
filetime = ctx.get('fragment_filetime')
|
||||
if filetime:
|
||||
with contextlib.suppress(Exception):
|
||||
os.utime(ctx['filename'], (time.time(), filetime))
|
||||
downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
|
||||
filetime = ctx.get('fragment_filetime')
|
||||
if self.params.get('updatetime', True) and filetime:
|
||||
with contextlib.suppress(Exception):
|
||||
os.utime(ctx['filename'], (time.time(), filetime))
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': downloaded_bytes,
|
||||
@@ -316,6 +323,7 @@ class FragmentFD(FileDownloader):
|
||||
'max_progress': ctx.get('max_progress'),
|
||||
'progress_idx': ctx.get('progress_idx'),
|
||||
}, info_dict)
|
||||
return True
|
||||
|
||||
def _prepare_external_frag_download(self, ctx):
|
||||
if 'live' not in ctx:
|
||||
@@ -362,7 +370,7 @@ class FragmentFD(FileDownloader):
|
||||
|
||||
return decrypt_fragment
|
||||
|
||||
def download_and_append_fragments_multiple(self, *args, pack_func=None, finish_func=None):
|
||||
def download_and_append_fragments_multiple(self, *args, **kwargs):
|
||||
'''
|
||||
@params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ...
|
||||
all args must be either tuple or list
|
||||
@@ -370,7 +378,7 @@ class FragmentFD(FileDownloader):
|
||||
interrupt_trigger = [True]
|
||||
max_progress = len(args)
|
||||
if max_progress == 1:
|
||||
return self.download_and_append_fragments(*args[0], pack_func=pack_func, finish_func=finish_func)
|
||||
return self.download_and_append_fragments(*args[0], **kwargs)
|
||||
max_workers = self.params.get('concurrent_fragment_downloads', 1)
|
||||
if max_progress > 1:
|
||||
self._prepare_multiline_status(max_progress)
|
||||
@@ -380,8 +388,7 @@ class FragmentFD(FileDownloader):
|
||||
ctx['max_progress'] = max_progress
|
||||
ctx['progress_idx'] = idx
|
||||
return self.download_and_append_fragments(
|
||||
ctx, fragments, info_dict, pack_func=pack_func, finish_func=finish_func,
|
||||
tpe=tpe, interrupt_trigger=interrupt_trigger)
|
||||
ctx, fragments, info_dict, **kwargs, tpe=tpe, interrupt_trigger=interrupt_trigger)
|
||||
|
||||
class FTPE(concurrent.futures.ThreadPoolExecutor):
|
||||
# has to stop this or it's going to wait on the worker thread itself
|
||||
@@ -428,17 +435,12 @@ class FragmentFD(FileDownloader):
|
||||
return result
|
||||
|
||||
def download_and_append_fragments(
|
||||
self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None,
|
||||
tpe=None, interrupt_trigger=None):
|
||||
if not interrupt_trigger:
|
||||
interrupt_trigger = (True, )
|
||||
self, ctx, fragments, info_dict, *, is_fatal=(lambda idx: False),
|
||||
pack_func=(lambda content, idx: content), finish_func=None,
|
||||
tpe=None, interrupt_trigger=(True, )):
|
||||
|
||||
is_fatal = (
|
||||
((lambda _: False) if info_dict.get('is_live') else (lambda idx: idx == 0))
|
||||
if self.params.get('skip_unavailable_fragments', True) else (lambda _: True))
|
||||
|
||||
if not pack_func:
|
||||
pack_func = lambda frag_content, _: frag_content
|
||||
if not self.params.get('skip_unavailable_fragments', True):
|
||||
is_fatal = lambda _: True
|
||||
|
||||
def download_fragment(fragment, ctx):
|
||||
if not interrupt_trigger[0]:
|
||||
@@ -527,5 +529,4 @@ class FragmentFD(FileDownloader):
|
||||
if finish_func is not None:
|
||||
ctx['dest_stream'].write(finish_func())
|
||||
ctx['dest_stream'].flush()
|
||||
self._finish_frag_download(ctx, info_dict)
|
||||
return True
|
||||
return self._finish_frag_download(ctx, info_dict)
|
||||
|
||||
@@ -280,5 +280,4 @@ class IsmFD(FragmentFD):
|
||||
return False
|
||||
self.report_skip_fragment(frag_index)
|
||||
|
||||
self._finish_frag_download(ctx, info_dict)
|
||||
return True
|
||||
return self._finish_frag_download(ctx, info_dict)
|
||||
|
||||
@@ -186,5 +186,4 @@ body > figure > img {
|
||||
|
||||
ctx['dest_stream'].write(
|
||||
b'--%b--\r\n\r\n' % frag_boundary.encode('us-ascii'))
|
||||
self._finish_frag_download(ctx, info_dict)
|
||||
return True
|
||||
return self._finish_frag_download(ctx, info_dict)
|
||||
|
||||
@@ -191,8 +191,7 @@ class YoutubeLiveChatFD(FragmentFD):
|
||||
if test:
|
||||
break
|
||||
|
||||
self._finish_frag_download(ctx, info_dict)
|
||||
return True
|
||||
return self._finish_frag_download(ctx, info_dict)
|
||||
|
||||
@staticmethod
|
||||
def parse_live_timestamp(action):
|
||||
|
||||
@@ -65,11 +65,18 @@ from .aenetworks import (
|
||||
HistoryPlayerIE,
|
||||
BiographyIE,
|
||||
)
|
||||
from .aeonco import AeonCoIE
|
||||
from .afreecatv import (
|
||||
AfreecaTVIE,
|
||||
AfreecaTVLiveIE,
|
||||
AfreecaTVUserIE,
|
||||
)
|
||||
from .agora import (
|
||||
TokFMAuditionIE,
|
||||
TokFMPodcastIE,
|
||||
WyborczaPodcastIE,
|
||||
WyborczaVideoIE,
|
||||
)
|
||||
from .airmozilla import AirMozillaIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
from .alphaporno import AlphaPornoIE
|
||||
@@ -186,9 +193,10 @@ from .bigo import BigoIE
|
||||
from .bild import BildIE
|
||||
from .bilibili import (
|
||||
BiliBiliIE,
|
||||
BiliBiliBangumiIE,
|
||||
BiliBiliBangumiMediaIE,
|
||||
BiliBiliSearchIE,
|
||||
BilibiliCategoryIE,
|
||||
BiliBiliBangumiIE,
|
||||
BilibiliAudioIE,
|
||||
BilibiliAudioAlbumIE,
|
||||
BiliBiliPlayerIE,
|
||||
@@ -247,6 +255,7 @@ from .camdemy import (
|
||||
CamdemyFolderIE
|
||||
)
|
||||
from .cammodels import CamModelsIE
|
||||
from .camsoda import CamsodaIE
|
||||
from .camtasia import CamtasiaEmbedIE
|
||||
from .camwithher import CamWithHerIE
|
||||
from .canalalpha import CanalAlphaIE
|
||||
@@ -310,6 +319,7 @@ from .chirbit import (
|
||||
)
|
||||
from .cinchcast import CinchcastIE
|
||||
from .cinemax import CinemaxIE
|
||||
from .cinetecamilano import CinetecaMilanoIE
|
||||
from .ciscolive import (
|
||||
CiscoLiveSessionIE,
|
||||
CiscoLiveSearchIE,
|
||||
@@ -364,8 +374,6 @@ from .crowdbunker import (
|
||||
CrowdBunkerChannelIE,
|
||||
)
|
||||
from .crunchyroll import (
|
||||
CrunchyrollIE,
|
||||
CrunchyrollShowPlaylistIE,
|
||||
CrunchyrollBetaIE,
|
||||
CrunchyrollBetaShowIE,
|
||||
)
|
||||
@@ -440,6 +448,7 @@ from .dplay import (
|
||||
AnimalPlanetIE,
|
||||
TLCIE,
|
||||
MotorTrendIE,
|
||||
MotorTrendOnDemandIE,
|
||||
DiscoveryPlusIndiaIE,
|
||||
DiscoveryNetworksDeIE,
|
||||
DiscoveryPlusItalyIE,
|
||||
@@ -461,11 +470,14 @@ from .duboku import (
|
||||
)
|
||||
from .dumpert import DumpertIE
|
||||
from .defense import DefenseGouvFrIE
|
||||
from .deuxm import (
|
||||
DeuxMIE,
|
||||
DeuxMNewsIE
|
||||
)
|
||||
from .digitalconcerthall import DigitalConcertHallIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .disney import DisneyIE
|
||||
from .dispeak import DigitallySpeakingIE
|
||||
from .doodstream import DoodStreamIE
|
||||
from .dropbox import DropboxIE
|
||||
from .dropout import (
|
||||
DropoutSeasonIE,
|
||||
@@ -577,6 +589,7 @@ from .foxgay import FoxgayIE
|
||||
from .foxnews import (
|
||||
FoxNewsIE,
|
||||
FoxNewsArticleIE,
|
||||
FoxNewsVideoIE,
|
||||
)
|
||||
from .foxsports import FoxSportsIE
|
||||
from .fptplay import FptplayIE
|
||||
@@ -627,6 +640,10 @@ from .gazeta import GazetaIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .gedidigital import GediDigitalIE
|
||||
from .generic import GenericIE
|
||||
from .genius import (
|
||||
GeniusIE,
|
||||
GeniusLyricsIE,
|
||||
)
|
||||
from .gettr import (
|
||||
GettrIE,
|
||||
GettrStreamingIE,
|
||||
@@ -683,6 +700,7 @@ from .hotstar import (
|
||||
HotStarIE,
|
||||
HotStarPrefixIE,
|
||||
HotStarPlaylistIE,
|
||||
HotStarSeasonIE,
|
||||
HotStarSeriesIE,
|
||||
)
|
||||
from .howcast import HowcastIE
|
||||
@@ -696,7 +714,10 @@ from .hse import (
|
||||
HSEShowIE,
|
||||
HSEProductIE,
|
||||
)
|
||||
from .genericembeds import HTML5MediaEmbedIE
|
||||
from .genericembeds import (
|
||||
HTML5MediaEmbedIE,
|
||||
QuotedHTMLIE,
|
||||
)
|
||||
from .huajiao import HuajiaoIE
|
||||
from .huya import HuyaLiveIE
|
||||
from .huffpost import HuffPostIE
|
||||
@@ -786,6 +807,13 @@ from .jamendo import (
|
||||
JamendoIE,
|
||||
JamendoAlbumIE,
|
||||
)
|
||||
from .japandiet import (
|
||||
ShugiinItvLiveIE,
|
||||
ShugiinItvLiveRoomIE,
|
||||
ShugiinItvVodIE,
|
||||
SangiinInstructionIE,
|
||||
SangiinIE,
|
||||
)
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jove import JoveIE
|
||||
from .joj import JojIE
|
||||
@@ -885,6 +913,7 @@ from .linkedin import (
|
||||
)
|
||||
from .linuxacademy import LinuxAcademyIE
|
||||
from .liputan6 import Liputan6IE
|
||||
from .listennotes import ListenNotesIE
|
||||
from .litv import LiTVIE
|
||||
from .livejournal import LiveJournalIE
|
||||
from .livestream import (
|
||||
@@ -998,6 +1027,7 @@ from .mlb import (
|
||||
MLBIE,
|
||||
MLBVideoIE,
|
||||
MLBTVIE,
|
||||
MLBArticleIE,
|
||||
)
|
||||
from .mlssoccer import MLSSoccerIE
|
||||
from .mnet import MnetIE
|
||||
@@ -1180,6 +1210,7 @@ from .noodlemagazine import NoodleMagazineIE
|
||||
from .noovo import NoovoIE
|
||||
from .normalboots import NormalbootsIE
|
||||
from .nosvideo import NosVideoIE
|
||||
from .nosnl import NOSNLArticleIE
|
||||
from .nova import (
|
||||
NovaEmbedIE,
|
||||
NovaIE,
|
||||
@@ -1229,6 +1260,10 @@ from .nzherald import NZHeraldIE
|
||||
from .nzz import NZZIE
|
||||
from .odatv import OdaTVIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .oftv import (
|
||||
OfTVIE,
|
||||
OfTVPlaylistIE
|
||||
)
|
||||
from .oktoberfesttv import OktoberfestTVIE
|
||||
from .olympics import OlympicsReplayIE
|
||||
from .on24 import On24IE
|
||||
@@ -1343,6 +1378,7 @@ from .pluralsight import (
|
||||
PluralsightIE,
|
||||
PluralsightCourseIE,
|
||||
)
|
||||
from .podbayfm import PodbayFMIE, PodbayFMChannelIE
|
||||
from .podchaser import PodchaserIE
|
||||
from .podomatic import PodomaticIE
|
||||
from .pokemon import (
|
||||
@@ -1397,6 +1433,7 @@ from .prx import (
|
||||
)
|
||||
from .puls4 import Puls4IE
|
||||
from .pyvideo import PyvideoIE
|
||||
from .qingting import QingTingIE
|
||||
from .qqmusic import (
|
||||
QQMusicIE,
|
||||
QQMusicSingerIE,
|
||||
@@ -1564,6 +1601,7 @@ from .samplefocus import SampleFocusIE
|
||||
from .sapo import SapoIE
|
||||
from .savefrom import SaveFromIE
|
||||
from .sbs import SBSIE
|
||||
from .screen9 import Screen9IE
|
||||
from .screencast import ScreencastIE
|
||||
from .screencastomatic import ScreencastOMaticIE
|
||||
from .scrippsnetworks import (
|
||||
@@ -1609,7 +1647,6 @@ from .skyit import (
|
||||
SkyItVideoIE,
|
||||
SkyItVideoLiveIE,
|
||||
SkyItIE,
|
||||
SkyItAcademyIE,
|
||||
SkyItArteIE,
|
||||
CieloTVItIE,
|
||||
TV8ItIE,
|
||||
@@ -1729,6 +1766,7 @@ from .svt import (
|
||||
SVTPlayIE,
|
||||
SVTSeriesIE,
|
||||
)
|
||||
from .swearnet import SwearnetEpisodeIE
|
||||
from .swrmediathek import SWRMediathekIE
|
||||
from .syvdk import SYVDKIE
|
||||
from .syfy import SyfyIE
|
||||
@@ -1879,7 +1917,6 @@ from .tv2 import (
|
||||
)
|
||||
from .tv24ua import (
|
||||
TV24UAVideoIE,
|
||||
TV24UAGenericPassthroughIE
|
||||
)
|
||||
from .tv2dk import (
|
||||
TV2DKIE,
|
||||
@@ -1930,7 +1967,8 @@ from .tvp import (
|
||||
TVPEmbedIE,
|
||||
TVPIE,
|
||||
TVPStreamIE,
|
||||
TVPWebsiteIE,
|
||||
TVPVODSeriesIE,
|
||||
TVPVODVideoIE,
|
||||
)
|
||||
from .tvplay import (
|
||||
TVPlayIE,
|
||||
@@ -1961,6 +1999,7 @@ from .twitter import (
|
||||
TwitterIE,
|
||||
TwitterAmplifyIE,
|
||||
TwitterBroadcastIE,
|
||||
TwitterSpacesIE,
|
||||
TwitterShortenerIE,
|
||||
)
|
||||
from .udemy import (
|
||||
@@ -1984,6 +2023,7 @@ from .umg import UMGDeIE
|
||||
from .unistra import UnistraIE
|
||||
from .unity import UnityIE
|
||||
from .unscripted import UnscriptedNewsVideoIE
|
||||
from .unsupported import KnownDRMIE, KnownPiracyIE
|
||||
from .uol import UOLIE
|
||||
from .uplynk import (
|
||||
UplynkIE,
|
||||
@@ -2162,7 +2202,10 @@ from .wistia import (
|
||||
WistiaPlaylistIE,
|
||||
WistiaChannelIE,
|
||||
)
|
||||
from .wordpress import WordpressPlaylistEmbedIE
|
||||
from .wordpress import (
|
||||
WordpressPlaylistEmbedIE,
|
||||
WordpressMiniAudioPlayerEmbedIE,
|
||||
)
|
||||
from .worldstarhiphop import WorldStarHipHopIE
|
||||
from .wppilot import (
|
||||
WPPilotIE,
|
||||
@@ -2223,6 +2266,7 @@ from .yandexvideo import (
|
||||
from .yapfiles import YapFilesIE
|
||||
from .yesjapan import YesJapanIE
|
||||
from .yinyuetai import YinYueTaiIE
|
||||
from .yle_areena import YleAreenaIE
|
||||
from .ynet import YnetIE
|
||||
from .youjizz import YouJizzIE
|
||||
from .youku import (
|
||||
@@ -2285,6 +2329,7 @@ from .zee5 import (
|
||||
Zee5IE,
|
||||
Zee5SeriesIE,
|
||||
)
|
||||
from .zeenews import ZeeNewsIE
|
||||
from .zhihu import ZhihuIE
|
||||
from .zingmp3 import (
|
||||
ZingMp3IE,
|
||||
|
||||
@@ -161,7 +161,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
ac_idx = parse_qs(url).get('ac', [None])[-1]
|
||||
video_id = f'{video_id}{format_field(ac_idx, template="__%s")}'
|
||||
video_id = f'{video_id}{format_field(ac_idx, None, "__%s")}'
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
json_bangumi_data = self._search_json(r'window.bangumiData\s*=', webpage, 'bangumiData', video_id)
|
||||
|
||||
@@ -28,30 +28,34 @@ from ..utils import (
|
||||
|
||||
|
||||
class ADNIE(InfoExtractor):
|
||||
IE_DESC = 'Anime Digital Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'md5': '0319c99885ff5547565cacb4f3f9348d',
|
||||
IE_DESC = 'Animation Digital Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
|
||||
'md5': '1c9ef066ceb302c86f80c2b371615261',
|
||||
'info_dict': {
|
||||
'id': '7778',
|
||||
'id': '9841',
|
||||
'ext': 'mp4',
|
||||
'title': 'Blue Exorcist - Kyôto Saga - Episode 1',
|
||||
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
|
||||
'series': 'Blue Exorcist - Kyôto Saga',
|
||||
'duration': 1467,
|
||||
'release_date': '20170106',
|
||||
'title': 'Fruits Basket - Episode 1',
|
||||
'description': 'md5:14be2f72c3c96809b0ca424b0097d336',
|
||||
'series': 'Fruits Basket',
|
||||
'duration': 1437,
|
||||
'release_date': '20190405',
|
||||
'comment_count': int,
|
||||
'average_rating': float,
|
||||
'season_number': 2,
|
||||
'episode': 'Début des hostilités',
|
||||
'season_number': 1,
|
||||
'episode': 'À ce soir !',
|
||||
'episode_number': 1,
|
||||
}
|
||||
}
|
||||
},
|
||||
'skip': 'Only available in region (FR, ...)',
|
||||
}, {
|
||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_NETRC_MACHINE = 'animedigitalnetwork'
|
||||
_BASE_URL = 'http://animedigitalnetwork.fr'
|
||||
_API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
|
||||
_NETRC_MACHINE = 'animationdigitalnetwork'
|
||||
_BASE = 'animationdigitalnetwork.fr'
|
||||
_API_BASE_URL = 'https://gw.api.' + _BASE + '/'
|
||||
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
|
||||
_HEADERS = {}
|
||||
_LOGIN_ERR_MESSAGE = 'Unable to log in'
|
||||
@@ -75,11 +79,11 @@ class ADNIE(InfoExtractor):
|
||||
if subtitle_location:
|
||||
enc_subtitles = self._download_webpage(
|
||||
subtitle_location, video_id, 'Downloading subtitles data',
|
||||
fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'})
|
||||
fatal=False, headers={'Origin': 'https://' + self._BASE})
|
||||
if not enc_subtitles:
|
||||
return None
|
||||
|
||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
# http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes(
|
||||
compat_b64decode(enc_subtitles[24:]),
|
||||
binascii.unhexlify(self._K + '7fac1178830cfe0c'),
|
||||
|
||||
40
yt_dlp/extractor/aeonco.py
Normal file
40
yt_dlp/extractor/aeonco.py
Normal file
@@ -0,0 +1,40 @@
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
|
||||
|
||||
class AeonCoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?aeon\.co/videos/(?P<id>[^/?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://aeon.co/videos/raw-solar-storm-footage-is-the-punk-rock-antidote-to-sleek-james-webb-imagery',
|
||||
'md5': 'e5884d80552c9b6ea8d268a258753362',
|
||||
'info_dict': {
|
||||
'id': '1284717',
|
||||
'ext': 'mp4',
|
||||
'title': 'Brilliant Noise',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/21006315-1a1e49da8b07fd908384a982b4ba9ff0268c509a474576ebdf7b1392f4acae3b-d_960',
|
||||
'uploader': 'Semiconductor',
|
||||
'uploader_id': 'semiconductor',
|
||||
'uploader_url': 'https://vimeo.com/semiconductor',
|
||||
'duration': 348
|
||||
}
|
||||
}, {
|
||||
'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it',
|
||||
'md5': '4e5f3dad9dbda0dbfa2da41a851e631e',
|
||||
'info_dict': {
|
||||
'id': '728595228',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wrought',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1484618528-c91452611f9a4e4497735a533da60d45b2fe472deb0c880f0afaab0cd2efb22a-d_1280',
|
||||
'uploader': 'Biofilm Productions',
|
||||
'uploader_id': 'user140352216',
|
||||
'uploader_url': 'https://vimeo.com/user140352216',
|
||||
'duration': 1344
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
vimeo_id = self._search_regex(r'hosterId":\s*"(?P<id>[0-9]+)', webpage, 'vimeo id')
|
||||
vimeo_url = VimeoIE._smuggle_referrer(f'https://player.vimeo.com/video/{vimeo_id}', 'https://aeon.co')
|
||||
return self.url_result(vimeo_url, VimeoIE)
|
||||
253
yt_dlp/extractor/agora.py
Normal file
253
yt_dlp/extractor/agora.py
Normal file
@@ -0,0 +1,253 @@
|
||||
import functools
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
int_or_none,
|
||||
month_by_name,
|
||||
parse_duration,
|
||||
try_call,
|
||||
)
|
||||
|
||||
|
||||
class WyborczaVideoIE(InfoExtractor):
|
||||
# this id is not an article id, it has to be extracted from the article
|
||||
_VALID_URL = r'(?:wyborcza:video:|https?://wyborcza\.pl/(?:api-)?video/)(?P<id>\d+)'
|
||||
IE_NAME = 'wyborcza:video'
|
||||
_TESTS = [{
|
||||
'url': 'wyborcza:video:26207634',
|
||||
'info_dict': {
|
||||
'id': '26207634',
|
||||
'ext': 'mp4',
|
||||
'title': '- Polska w 2020 r. jest innym państwem niż w 2015 r. Nie zmieniła się konstytucja, ale jest to już inny ustrój - mówi Adam Bodnar',
|
||||
'description': ' ',
|
||||
'uploader': 'Dorota Roman',
|
||||
'duration': 2474,
|
||||
'thumbnail': r're:https://.+\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://wyborcza.pl/video/26207634',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://wyborcza.pl/api-video/26207634',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
meta = self._download_json(f'https://wyborcza.pl/api-video/{video_id}', video_id)
|
||||
|
||||
formats = []
|
||||
base_url = meta['redirector'].replace('http://', 'https://') + meta['basePath']
|
||||
for quality in ('standard', 'high'):
|
||||
if not meta['files'].get(quality):
|
||||
continue
|
||||
formats.append({
|
||||
'url': base_url + meta['files'][quality],
|
||||
'height': int_or_none(
|
||||
self._search_regex(
|
||||
r'p(\d+)[a-z]+\.mp4$', meta['files'][quality],
|
||||
'mp4 video height', default=None)),
|
||||
'format_id': quality,
|
||||
})
|
||||
if meta['files'].get('dash'):
|
||||
formats.extend(self._extract_mpd_formats(base_url + meta['files']['dash'], video_id))
|
||||
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': meta.get('title'),
|
||||
'description': meta.get('lead'),
|
||||
'uploader': meta.get('signature'),
|
||||
'thumbnail': meta.get('imageUrl'),
|
||||
'duration': meta.get('duration'),
|
||||
}
|
||||
|
||||
|
||||
class WyborczaPodcastIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?(?:
|
||||
wyborcza\.pl/podcast(?:/0,172673\.html)?|
|
||||
wysokieobcasy\.pl/wysokie-obcasy/0,176631\.html
|
||||
)(?:\?(?:[^&#]+?&)*podcast=(?P<id>\d+))?
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://wyborcza.pl/podcast/0,172673.html?podcast=100720#S.main_topic-K.C-B.6-L.1.podcast',
|
||||
'info_dict': {
|
||||
'id': '100720',
|
||||
'ext': 'mp3',
|
||||
'title': 'Cyfrodziewczyny. Kim były pionierki polskiej informatyki ',
|
||||
'uploader': 'Michał Nogaś ',
|
||||
'upload_date': '20210117',
|
||||
'description': 'md5:49f0a06ffc4c1931210d3ab1416a651d',
|
||||
'duration': 3684.0,
|
||||
'thumbnail': r're:https://.+\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html?podcast=100673',
|
||||
'info_dict': {
|
||||
'id': '100673',
|
||||
'ext': 'mp3',
|
||||
'title': 'Czym jest ubóstwo menstruacyjne i dlaczego dotyczy każdej i każdego z nas?',
|
||||
'uploader': 'Agnieszka Urazińska ',
|
||||
'upload_date': '20210115',
|
||||
'description': 'md5:c161dc035f8dbb60077011fc41274899',
|
||||
'duration': 1803.0,
|
||||
'thumbnail': r're:https://.+\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://wyborcza.pl/podcast',
|
||||
'info_dict': {
|
||||
'id': '334',
|
||||
'title': 'Gościnnie: Wyborcza, 8:10',
|
||||
'series': 'Gościnnie: Wyborcza, 8:10',
|
||||
},
|
||||
'playlist_mincount': 370,
|
||||
}, {
|
||||
'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html',
|
||||
'info_dict': {
|
||||
'id': '395',
|
||||
'title': 'Gościnnie: Wysokie Obcasy',
|
||||
'series': 'Gościnnie: Wysokie Obcasy',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
podcast_id = self._match_id(url)
|
||||
|
||||
if not podcast_id: # playlist
|
||||
podcast_id = '395' if 'wysokieobcasy.pl/' in url else '334'
|
||||
return self.url_result(TokFMAuditionIE._create_url(podcast_id), TokFMAuditionIE, podcast_id)
|
||||
|
||||
meta = self._download_json('https://wyborcza.pl/api/podcast', podcast_id,
|
||||
query={'guid': podcast_id, 'type': 'wo' if 'wysokieobcasy.pl/' in url else None})
|
||||
|
||||
day, month, year = self._search_regex(r'^(\d\d?) (\w+) (\d{4})$', meta.get('publishedDate'),
|
||||
'upload date', group=(1, 2, 3), default=(None, None, None))
|
||||
return {
|
||||
'id': podcast_id,
|
||||
'url': meta['url'],
|
||||
'title': meta.get('title'),
|
||||
'description': meta.get('description'),
|
||||
'thumbnail': meta.get('imageUrl'),
|
||||
'duration': parse_duration(meta.get('duration')),
|
||||
'uploader': meta.get('author'),
|
||||
'upload_date': try_call(lambda: f'{year}{month_by_name(month, lang="pl"):0>2}{day:0>2}'),
|
||||
}
|
||||
|
||||
|
||||
class TokFMPodcastIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://audycje\.tokfm\.pl/podcast/|tokfm:podcast:)(?P<id>\d+),?'
|
||||
IE_NAME = 'tokfm:podcast'
|
||||
_TESTS = [{
|
||||
'url': 'https://audycje.tokfm.pl/podcast/91275,-Systemowy-rasizm-Czy-zamieszki-w-USA-po-morderstwie-w-Minneapolis-doprowadza-do-zmian-w-sluzbach-panstwowych',
|
||||
'info_dict': {
|
||||
'id': '91275',
|
||||
'ext': 'aac',
|
||||
'title': 'md5:a9b15488009065556900169fb8061cce',
|
||||
'episode': 'md5:a9b15488009065556900169fb8061cce',
|
||||
'series': 'Analizy',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
|
||||
# in case it breaks see this but it returns a lot of useless data
|
||||
# https://api.podcast.radioagora.pl/api4/getPodcasts?podcast_id=100091&with_guests=true&with_leaders_for_mobile=true
|
||||
metadata = self._download_json(
|
||||
f'https://audycje.tokfm.pl/getp/3{media_id}', media_id, 'Downloading podcast metadata')
|
||||
if not metadata:
|
||||
raise ExtractorError('No such podcast', expected=True)
|
||||
metadata = metadata[0]
|
||||
|
||||
formats = []
|
||||
for ext in ('aac', 'mp3'):
|
||||
url_data = self._download_json(
|
||||
f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}',
|
||||
media_id, 'Downloading podcast %s URL' % ext)
|
||||
# prevents inserting the mp3 (default) multiple times
|
||||
if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']:
|
||||
formats.append({
|
||||
'url': url_data['link_ssl'],
|
||||
'ext': ext,
|
||||
'vcodec': 'none',
|
||||
'acodec': ext,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': media_id,
|
||||
'formats': formats,
|
||||
'title': metadata.get('podcast_name'),
|
||||
'series': metadata.get('series_name'),
|
||||
'episode': metadata.get('podcast_name'),
|
||||
}
|
||||
|
||||
|
||||
class TokFMAuditionIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://audycje\.tokfm\.pl/audycja/|tokfm:audition:)(?P<id>\d+),?'
|
||||
IE_NAME = 'tokfm:audition'
|
||||
_TESTS = [{
|
||||
'url': 'https://audycje.tokfm.pl/audycja/218,Analizy',
|
||||
'info_dict': {
|
||||
'id': '218',
|
||||
'title': 'Analizy',
|
||||
'series': 'Analizy',
|
||||
},
|
||||
'playlist_count': 1635,
|
||||
}]
|
||||
|
||||
_PAGE_SIZE = 30
|
||||
_HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0 (Linux; Android 9; Redmi 3S Build/PQ3A.190801.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.101 Mobile Safari/537.36',
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _create_url(id):
|
||||
return f'https://audycje.tokfm.pl/audycja/{id}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
audition_id = self._match_id(url)
|
||||
|
||||
data = self._download_json(
|
||||
f'https://api.podcast.radioagora.pl/api4/getSeries?series_id={audition_id}',
|
||||
audition_id, 'Downloading audition metadata', headers=self._HEADERS)
|
||||
if not data:
|
||||
raise ExtractorError('No such audition', expected=True)
|
||||
data = data[0]
|
||||
|
||||
entries = OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, audition_id, data), self._PAGE_SIZE)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': audition_id,
|
||||
'title': data.get('series_name'),
|
||||
'series': data.get('series_name'),
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
def _fetch_page(self, audition_id, data, page):
|
||||
for retry in self.RetryManager():
|
||||
podcast_page = self._download_json(
|
||||
f'https://api.podcast.radioagora.pl/api4/getPodcasts?series_id={audition_id}&limit=30&offset={page}&with_guests=true&with_leaders_for_mobile=true',
|
||||
audition_id, f'Downloading podcast list page {page + 1}', headers=self._HEADERS)
|
||||
if not podcast_page:
|
||||
retry.error = ExtractorError('Agora returned empty page', expected=True)
|
||||
|
||||
for podcast in podcast_page:
|
||||
yield {
|
||||
'_type': 'url_transparent',
|
||||
'url': podcast['podcast_sharing_url'],
|
||||
'ie_key': TokFMPodcastIE.ie_key(),
|
||||
'title': podcast.get('podcast_name'),
|
||||
'episode': podcast.get('podcast_name'),
|
||||
'description': podcast.get('podcast_description'),
|
||||
'timestamp': int_or_none(podcast.get('podcast_timestamp')),
|
||||
'series': data.get('series_name'),
|
||||
}
|
||||
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class AmericasTestKitchenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?P<resource_type>episode|videos)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:cooks(?:country|illustrated)/)?(?P<resource_type>episode|videos)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
|
||||
'md5': 'b861c3e365ac38ad319cfd509c30577f',
|
||||
@@ -19,15 +19,20 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
'id': '5b400b9ee338f922cb06450c',
|
||||
'title': 'Japanese Suppers',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'weeknight-japanese-suppers',
|
||||
'description': 'md5:64e606bfee910627efc4b5f050de92b3',
|
||||
'thumbnail': r're:^https?://',
|
||||
'timestamp': 1523318400,
|
||||
'upload_date': '20180410',
|
||||
'release_date': '20180410',
|
||||
'series': "America's Test Kitchen",
|
||||
'season_number': 18,
|
||||
'timestamp': 1523304000,
|
||||
'upload_date': '20180409',
|
||||
'release_date': '20180409',
|
||||
'series': 'America\'s Test Kitchen',
|
||||
'season': 'Season 18',
|
||||
'episode': 'Japanese Suppers',
|
||||
'season_number': 18,
|
||||
'episode_number': 15,
|
||||
'duration': 1376,
|
||||
'thumbnail': r're:^https?://',
|
||||
'average_rating': 0,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -40,15 +45,20 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
'id': '5fbe8c61bda2010001c6763b',
|
||||
'title': 'Simple Chicken Dinner',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'atktv_2103_simple-chicken-dinner_full-episode_web-mp4',
|
||||
'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
|
||||
'thumbnail': r're:^https?://',
|
||||
'timestamp': 1610755200,
|
||||
'upload_date': '20210116',
|
||||
'release_date': '20210116',
|
||||
'series': "America's Test Kitchen",
|
||||
'season_number': 21,
|
||||
'timestamp': 1610737200,
|
||||
'upload_date': '20210115',
|
||||
'release_date': '20210115',
|
||||
'series': 'America\'s Test Kitchen',
|
||||
'season': 'Season 21',
|
||||
'episode': 'Simple Chicken Dinner',
|
||||
'season_number': 21,
|
||||
'episode_number': 3,
|
||||
'duration': 1397,
|
||||
'thumbnail': r're:^https?://',
|
||||
'view_count': int,
|
||||
'average_rating': 0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -57,10 +67,10 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
|
||||
'url': 'https://www.americastestkitchen.com/cookscountry/episode/564-when-only-chocolate-will-do',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cooksillustrated.com/videos/4478-beef-wellington',
|
||||
'url': 'https://www.americastestkitchen.com/cooksillustrated/videos/4478-beef-wellington',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@@ -90,7 +100,7 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
|
||||
|
||||
class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com(?P<show>/cookscountry)?/episodes/browse/season_(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# ATK Season
|
||||
'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
|
||||
@@ -101,7 +111,7 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
# Cooks Country Season
|
||||
'url': 'https://www.cookscountry.com/episodes/browse/season_12',
|
||||
'url': 'https://www.americastestkitchen.com/cookscountry/episodes/browse/season_12',
|
||||
'info_dict': {
|
||||
'id': 'season_12',
|
||||
'title': 'Season 12',
|
||||
@@ -110,17 +120,17 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_name, season_number = self._match_valid_url(url).groups()
|
||||
show_path, season_number = self._match_valid_url(url).group('show', 'id')
|
||||
season_number = int(season_number)
|
||||
|
||||
slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
|
||||
slug = 'cco' if show_path == '/cookscountry' else 'atk'
|
||||
|
||||
season = 'Season %d' % season_number
|
||||
|
||||
season_search = self._download_json(
|
||||
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
|
||||
season, headers={
|
||||
'Origin': 'https://www.%s.com' % show_name,
|
||||
'Origin': 'https://www.americastestkitchen.com',
|
||||
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
|
||||
'X-Algolia-Application-Id': 'Y1FNZXUI30',
|
||||
}, query={
|
||||
@@ -136,12 +146,12 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
|
||||
def entries():
|
||||
for episode in (season_search.get('hits') or []):
|
||||
search_url = episode.get('search_url')
|
||||
search_url = episode.get('search_url') # always formatted like '/episode/123-title-of-episode'
|
||||
if not search_url:
|
||||
continue
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'url': 'https://www.%s.com%s' % (show_name, search_url),
|
||||
'url': f'https://www.americastestkitchen.com{show_path or ""}{search_url}',
|
||||
'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
|
||||
'title': episode.get('title'),
|
||||
'description': episode.get('description'),
|
||||
|
||||
@@ -16,6 +16,7 @@ from ..utils import (
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
@@ -367,7 +368,9 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'channel_id': 'UCukCyHaD-bK3in_pKpfH9Eg',
|
||||
'duration': 32,
|
||||
'uploader_id': 'Zeurel',
|
||||
'uploader_url': 'http://www.youtube.com/user/Zeurel'
|
||||
'uploader_url': 'https://www.youtube.com/user/Zeurel',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCukCyHaD-bK3in_pKpfH9Eg',
|
||||
}
|
||||
}, {
|
||||
# Internal link
|
||||
@@ -382,7 +385,9 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'channel_id': 'UCHnyfMqiRRG1u-2MsSQLbXA',
|
||||
'duration': 771,
|
||||
'uploader_id': '1veritasium',
|
||||
'uploader_url': 'http://www.youtube.com/user/1veritasium'
|
||||
'uploader_url': 'https://www.youtube.com/user/1veritasium',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCHnyfMqiRRG1u-2MsSQLbXA',
|
||||
}
|
||||
}, {
|
||||
# Video from 2012, webm format itag 45. Newest capture is deleted video, with an invalid description.
|
||||
@@ -396,7 +401,9 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'duration': 398,
|
||||
'description': 'md5:ff4de6a7980cb65d951c2f6966a4f2f3',
|
||||
'uploader_id': 'machinima',
|
||||
'uploader_url': 'http://www.youtube.com/user/machinima'
|
||||
'uploader_url': 'https://www.youtube.com/user/machinima',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'machinima'
|
||||
}
|
||||
}, {
|
||||
# FLV video. Video file URL does not provide itag information
|
||||
@@ -410,7 +417,10 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'duration': 19,
|
||||
'description': 'md5:10436b12e07ac43ff8df65287a56efb4',
|
||||
'uploader_id': 'jawed',
|
||||
'uploader_url': 'http://www.youtube.com/user/jawed'
|
||||
'uploader_url': 'https://www.youtube.com/user/jawed',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC4QobU6STFB0P71PMvOGN5A',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'jawed',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA',
|
||||
@@ -424,7 +434,9 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'duration': 204,
|
||||
'description': 'md5:f7535343b6eda34a314eff8b85444680',
|
||||
'uploader_id': 'itsmadeon',
|
||||
'uploader_url': 'http://www.youtube.com/user/itsmadeon'
|
||||
'uploader_url': 'https://www.youtube.com/user/itsmadeon',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCqMDNf3Pn5L7pcNkuSEeO3w',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
}
|
||||
}, {
|
||||
# First capture is of dead video, second is the oldest from CDX response.
|
||||
@@ -435,10 +447,13 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'title': 'Fake Teen Doctor Strikes AGAIN! - Weekly Weird News',
|
||||
'upload_date': '20160218',
|
||||
'channel_id': 'UCdIaNUarhzLSXGoItz7BHVA',
|
||||
'duration': 1236,
|
||||
'duration': 1235,
|
||||
'description': 'md5:21032bae736421e89c2edf36d1936947',
|
||||
'uploader_id': 'MachinimaETC',
|
||||
'uploader_url': 'http://www.youtube.com/user/MachinimaETC'
|
||||
'uploader_url': 'https://www.youtube.com/user/MachinimaETC',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'ETC News',
|
||||
}
|
||||
}, {
|
||||
# First capture of dead video, capture date in link links to dead capture.
|
||||
@@ -449,10 +464,13 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'title': 'WTF: Video Games Still Launch BROKEN?! - T.U.G.S.',
|
||||
'upload_date': '20160219',
|
||||
'channel_id': 'UCdIaNUarhzLSXGoItz7BHVA',
|
||||
'duration': 798,
|
||||
'duration': 797,
|
||||
'description': 'md5:a1dbf12d9a3bd7cb4c5e33b27d77ffe7',
|
||||
'uploader_id': 'MachinimaETC',
|
||||
'uploader_url': 'http://www.youtube.com/user/MachinimaETC'
|
||||
'uploader_url': 'https://www.youtube.com/user/MachinimaETC',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'ETC News',
|
||||
},
|
||||
'expected_warnings': [
|
||||
r'unable to download capture webpage \(it may not be archived\)'
|
||||
@@ -472,12 +490,11 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'title': 'It\'s Bootleg AirPods Time.',
|
||||
'upload_date': '20211021',
|
||||
'channel_id': 'UC7Jwj9fkrf1adN4fMmTkpug',
|
||||
'channel_url': 'http://www.youtube.com/channel/UC7Jwj9fkrf1adN4fMmTkpug',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC7Jwj9fkrf1adN4fMmTkpug',
|
||||
'duration': 810,
|
||||
'description': 'md5:7b567f898d8237b256f36c1a07d6d7bc',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'DankPods',
|
||||
'uploader_id': 'UC7Jwj9fkrf1adN4fMmTkpug',
|
||||
'uploader_url': 'http://www.youtube.com/channel/UC7Jwj9fkrf1adN4fMmTkpug'
|
||||
}
|
||||
}, {
|
||||
# player response contains '};' See: https://github.com/ytdl-org/youtube-dl/issues/27093
|
||||
@@ -488,12 +505,135 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'title': 'bitch lasagna',
|
||||
'upload_date': '20181005',
|
||||
'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
|
||||
'channel_url': 'http://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
|
||||
'duration': 135,
|
||||
'description': 'md5:2dbe4051feeff2dab5f41f82bb6d11d0',
|
||||
'uploader': 'PewDiePie',
|
||||
'uploader_id': 'PewDiePie',
|
||||
'uploader_url': 'http://www.youtube.com/user/PewDiePie'
|
||||
'uploader_url': 'https://www.youtube.com/user/PewDiePie',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
}
|
||||
}, {
|
||||
# ~June 2010 Capture. swfconfig
|
||||
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=8XeW5ilk-9Y',
|
||||
'info_dict': {
|
||||
'id': '8XeW5ilk-9Y',
|
||||
'ext': 'flv',
|
||||
'title': 'Story of Stuff, The Critique Part 4 of 4',
|
||||
'duration': 541,
|
||||
'description': 'md5:28157da06f2c5e94c97f7f3072509972',
|
||||
'uploader': 'HowTheWorldWorks',
|
||||
'uploader_id': 'HowTheWorldWorks',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks',
|
||||
'upload_date': '20090520',
|
||||
}
|
||||
}, {
|
||||
# Jan 2011: watch-video-date/eow-date surrounded by whitespace
|
||||
'url': 'https://web.archive.org/web/20110126141719/http://www.youtube.com/watch?v=Q_yjX80U7Yc',
|
||||
'info_dict': {
|
||||
'id': 'Q_yjX80U7Yc',
|
||||
'ext': 'flv',
|
||||
'title': 'Spray Paint Art by Clay Butler: Purple Fantasy Forest',
|
||||
'uploader_id': 'claybutlermusic',
|
||||
'description': 'md5:4595264559e3d0a0ceb3f011f6334543',
|
||||
'upload_date': '20090803',
|
||||
'uploader': 'claybutlermusic',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'duration': 132,
|
||||
'uploader_url': 'https://www.youtube.com/user/claybutlermusic',
|
||||
}
|
||||
}, {
|
||||
# ~May 2009 swfArgs. ytcfg is spread out over various vars
|
||||
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=c5uJgG05xUY',
|
||||
'info_dict': {
|
||||
'id': 'c5uJgG05xUY',
|
||||
'ext': 'webm',
|
||||
'title': 'Story of Stuff, The Critique Part 1 of 4',
|
||||
'uploader_id': 'HowTheWorldWorks',
|
||||
'uploader': 'HowTheWorldWorks',
|
||||
'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks',
|
||||
'upload_date': '20090513',
|
||||
'description': 'md5:4ca77d79538064e41e4cc464e93f44f0',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'duration': 754,
|
||||
}
|
||||
}, {
|
||||
# ~June 2012. Upload date is in another lang so cannot extract.
|
||||
'url': 'https://web.archive.org/web/20120607174520/http://www.youtube.com/watch?v=xWTLLl-dQaA',
|
||||
'info_dict': {
|
||||
'id': 'xWTLLl-dQaA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Black Nerd eHarmony Video Bio Parody (SPOOF)',
|
||||
'uploader_url': 'https://www.youtube.com/user/BlackNerdComedy',
|
||||
'description': 'md5:e25f0133aaf9e6793fb81c18021d193e',
|
||||
'uploader_id': 'BlackNerdComedy',
|
||||
'uploader': 'BlackNerdComedy',
|
||||
'duration': 182,
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
}
|
||||
}, {
|
||||
# ~July 2013
|
||||
'url': 'https://web.archive.org/web/*/https://www.youtube.com/watch?v=9eO1aasHyTM',
|
||||
'info_dict': {
|
||||
'id': '9eO1aasHyTM',
|
||||
'ext': 'mp4',
|
||||
'title': 'Polar-oid',
|
||||
'description': 'Cameras and bears are dangerous!',
|
||||
'uploader_url': 'https://www.youtube.com/user/punkybird',
|
||||
'uploader_id': 'punkybird',
|
||||
'duration': 202,
|
||||
'channel_id': 'UC62R2cBezNBOqxSerfb1nMQ',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC62R2cBezNBOqxSerfb1nMQ',
|
||||
'upload_date': '20060428',
|
||||
'uploader': 'punkybird',
|
||||
}
|
||||
}, {
|
||||
# April 2020: Player response in player config
|
||||
'url': 'https://web.archive.org/web/20200416034815/https://www.youtube.com/watch?v=Cf7vS8jc7dY&gl=US&hl=en',
|
||||
'info_dict': {
|
||||
'id': 'Cf7vS8jc7dY',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Dramatic Pool Story (by Jamie Spicer-Lewis) - Game Grumps Animated',
|
||||
'duration': 64,
|
||||
'upload_date': '20200408',
|
||||
'uploader_id': 'GameGrumps',
|
||||
'uploader': 'GameGrumps',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC9CuvdOVfMPvKCiwdGKL3cQ',
|
||||
'channel_id': 'UC9CuvdOVfMPvKCiwdGKL3cQ',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'description': 'md5:c625bb3c02c4f5fb4205971e468fa341',
|
||||
'uploader_url': 'https://www.youtube.com/user/GameGrumps',
|
||||
}
|
||||
}, {
|
||||
# watch7-user-header with yt-user-info
|
||||
'url': 'ytarchive:kbh4T_b4Ixw:20160307085057',
|
||||
'info_dict': {
|
||||
'id': 'kbh4T_b4Ixw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Shovel Knight OST - Strike the Earth! Plains of Passage 16 bit SNES style remake / remix',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCnTaGvsHmMy792DWeT6HbGA',
|
||||
'uploader': 'Nelward music',
|
||||
'duration': 213,
|
||||
'description': 'md5:804b4a9ce37b050a5fefdbb23aeba54d',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'upload_date': '20150503',
|
||||
'channel_id': 'UCnTaGvsHmMy792DWeT6HbGA',
|
||||
}
|
||||
}, {
|
||||
# April 2012
|
||||
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=SOm7mPoPskU',
|
||||
'info_dict': {
|
||||
'id': 'SOm7mPoPskU',
|
||||
'ext': 'mp4',
|
||||
'title': 'Boyfriend - Justin Bieber Parody',
|
||||
'uploader_url': 'https://www.youtube.com/user/thecomputernerd01',
|
||||
'uploader': 'thecomputernerd01',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'description': 'md5:dd7fa635519c2a5b4d566beaecad7491',
|
||||
'duration': 200,
|
||||
'upload_date': '20120407',
|
||||
'uploader_id': 'thecomputernerd01',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw',
|
||||
@@ -574,6 +714,27 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
initial_data = self._search_json(
|
||||
self._YT_INITIAL_DATA_RE, webpage, 'initial data', video_id, default={})
|
||||
|
||||
ytcfg = {}
|
||||
for j in re.findall(r'yt\.setConfig\(\s*(?P<json>{\s*(?s:.+?)\s*})\s*\);', webpage): # ~June 2010
|
||||
ytcfg.update(self._parse_json(j, video_id, fatal=False, ignore_extra=True, transform_source=js_to_json, errnote='') or {})
|
||||
|
||||
# XXX: this also may contain a 'ptchn' key
|
||||
player_config = (
|
||||
self._search_json(
|
||||
r'(?:yt\.playerConfig|ytplayer\.config|swfConfig)\s*=',
|
||||
webpage, 'player config', video_id, default=None)
|
||||
or ytcfg.get('PLAYER_CONFIG') or {})
|
||||
|
||||
# XXX: this may also contain a 'creator' key.
|
||||
swf_args = self._search_json(r'swfArgs\s*=', webpage, 'swf config', video_id, default={})
|
||||
if swf_args and not traverse_obj(player_config, ('args',)):
|
||||
player_config['args'] = swf_args
|
||||
|
||||
if not player_response:
|
||||
# April 2020
|
||||
player_response = self._parse_json(
|
||||
traverse_obj(player_config, ('args', 'player_response')) or '{}', video_id, fatal=False)
|
||||
|
||||
initial_data_video = traverse_obj(
|
||||
initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'videoPrimaryInfoRenderer'),
|
||||
expected_type=dict, get_all=False, default={})
|
||||
@@ -588,21 +749,64 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
video_details.get('title')
|
||||
or YoutubeBaseInfoExtractor._get_text(microformats, 'title')
|
||||
or YoutubeBaseInfoExtractor._get_text(initial_data_video, 'title')
|
||||
or traverse_obj(player_config, ('args', 'title'))
|
||||
or self._extract_webpage_title(webpage)
|
||||
or search_meta(['og:title', 'twitter:title', 'title']))
|
||||
|
||||
def id_from_url(url, type_):
|
||||
return self._search_regex(
|
||||
rf'(?:{type_})/([^/#&?]+)', url or '', f'{type_} id', default=None)
|
||||
|
||||
# XXX: would the get_elements_by_... functions be better suited here?
|
||||
_CHANNEL_URL_HREF_RE = r'href="[^"]*(?P<url>https?://www\.youtube\.com/(?:user|channel)/[^"]+)"'
|
||||
uploader_or_channel_url = self._search_regex(
|
||||
[fr'<(?:link\s*itemprop=\"url\"|a\s*id=\"watch-username\").*?\b{_CHANNEL_URL_HREF_RE}>', # @fd05024
|
||||
fr'<div\s*id=\"(?:watch-channel-stats|watch-headline-user-info)\"[^>]*>\s*<a[^>]*\b{_CHANNEL_URL_HREF_RE}'], # ~ May 2009, ~June 2012
|
||||
webpage, 'uploader or channel url', default=None)
|
||||
|
||||
owner_profile_url = url_or_none(microformats.get('ownerProfileUrl')) # @a6211d2
|
||||
|
||||
# Uploader refers to the /user/ id ONLY
|
||||
uploader_id = (
|
||||
id_from_url(owner_profile_url, 'user')
|
||||
or id_from_url(uploader_or_channel_url, 'user')
|
||||
or ytcfg.get('VIDEO_USERNAME'))
|
||||
uploader_url = f'https://www.youtube.com/user/{uploader_id}' if uploader_id else None
|
||||
|
||||
# XXX: do we want to differentiate uploader and channel?
|
||||
uploader = (
|
||||
self._search_regex(
|
||||
[r'<a\s*id="watch-username"[^>]*>\s*<strong>([^<]+)</strong>', # June 2010
|
||||
r'var\s*watchUsername\s*=\s*\'(.+?)\';', # ~May 2009
|
||||
r'<div\s*\bid=\"watch-channel-stats"[^>]*>\s*<a[^>]*>\s*(.+?)\s*</a', # ~May 2009
|
||||
r'<a\s*id="watch-userbanner"[^>]*title="\s*(.+?)\s*"'], # ~June 2012
|
||||
webpage, 'uploader', default=None)
|
||||
or self._html_search_regex(
|
||||
[r'(?s)<div\s*class="yt-user-info".*?<a[^>]*[^>]*>\s*(.*?)\s*</a', # March 2016
|
||||
r'(?s)<a[^>]*yt-user-name[^>]*>\s*(.*?)\s*</a'], # july 2013
|
||||
get_element_by_id('watch7-user-header', webpage), 'uploader', default=None)
|
||||
or self._html_search_regex(
|
||||
r'<button\s*href="/user/[^>]*>\s*<span[^>]*>\s*(.+?)\s*<', # April 2012
|
||||
get_element_by_id('watch-headline-user-info', webpage), 'uploader', default=None)
|
||||
or traverse_obj(player_config, ('args', 'creator'))
|
||||
or video_details.get('author'))
|
||||
|
||||
channel_id = str_or_none(
|
||||
video_details.get('channelId')
|
||||
or microformats.get('externalChannelId')
|
||||
or search_meta('channelId')
|
||||
or self._search_regex(
|
||||
r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1', # @b45a9e6
|
||||
webpage, 'channel id', default=None, group='id'))
|
||||
channel_url = f'http://www.youtube.com/channel/{channel_id}' if channel_id else None
|
||||
webpage, 'channel id', default=None, group='id')
|
||||
or id_from_url(owner_profile_url, 'channel')
|
||||
or id_from_url(uploader_or_channel_url, 'channel')
|
||||
or traverse_obj(player_config, ('args', 'ucid')))
|
||||
|
||||
channel_url = f'https://www.youtube.com/channel/{channel_id}' if channel_id else None
|
||||
duration = int_or_none(
|
||||
video_details.get('lengthSeconds')
|
||||
or microformats.get('lengthSeconds')
|
||||
or traverse_obj(player_config, ('args', ('length_seconds', 'l')), get_all=False)
|
||||
or parse_duration(search_meta('duration')))
|
||||
description = (
|
||||
video_details.get('shortDescription')
|
||||
@@ -610,26 +814,13 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
or clean_html(get_element_by_id('eow-description', webpage)) # @9e6dd23
|
||||
or search_meta(['description', 'og:description', 'twitter:description']))
|
||||
|
||||
uploader = video_details.get('author')
|
||||
|
||||
# Uploader ID and URL
|
||||
uploader_mobj = re.search(
|
||||
r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">', # @fd05024
|
||||
webpage)
|
||||
if uploader_mobj is not None:
|
||||
uploader_id, uploader_url = uploader_mobj.group('uploader_id'), uploader_mobj.group('uploader_url')
|
||||
else:
|
||||
# @a6211d2
|
||||
uploader_url = url_or_none(microformats.get('ownerProfileUrl'))
|
||||
uploader_id = self._search_regex(
|
||||
r'(?:user|channel)/([^/]+)', uploader_url or '', 'uploader id', default=None)
|
||||
|
||||
upload_date = unified_strdate(
|
||||
dict_get(microformats, ('uploadDate', 'publishDate'))
|
||||
or search_meta(['uploadDate', 'datePublished'])
|
||||
or self._search_regex(
|
||||
[r'(?s)id="eow-date.*?>(.*?)</span>',
|
||||
r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'], # @7998520
|
||||
[r'(?s)id="eow-date.*?>\s*(.*?)\s*</span>',
|
||||
r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']', # @7998520
|
||||
r'class\s*=\s*"(?:watch-video-date|watch-video-added post-date)"[^>]*>\s*([^<]+?)\s*<'], # ~June 2010, ~Jan 2009 (respectively)
|
||||
webpage, 'upload date', default=None))
|
||||
|
||||
return {
|
||||
@@ -698,18 +889,22 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
url_date = url_date or url_date_2
|
||||
|
||||
urlh = None
|
||||
try:
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest('https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id),
|
||||
video_id, note='Fetching archived video file url', expected_status=True)
|
||||
except ExtractorError as e:
|
||||
# HTTP Error 404 is expected if the video is not saved.
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
self.raise_no_formats(
|
||||
'The requested video is not archived, indexed, or there is an issue with web.archive.org',
|
||||
expected=True)
|
||||
else:
|
||||
raise
|
||||
retry_manager = self.RetryManager(fatal=False)
|
||||
for retry in retry_manager:
|
||||
try:
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest('https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id),
|
||||
video_id, note='Fetching archived video file url', expected_status=True)
|
||||
except ExtractorError as e:
|
||||
# HTTP Error 404 is expected if the video is not saved.
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
self.raise_no_formats(
|
||||
'The requested video is not archived, indexed, or there is an issue with web.archive.org (try again later)', expected=True)
|
||||
else:
|
||||
retry.error = e
|
||||
|
||||
if retry_manager.error:
|
||||
self.raise_no_formats(retry_manager.error, expected=True, video_id=video_id)
|
||||
|
||||
capture_dates = self._get_capture_dates(video_id, int_or_none(url_date))
|
||||
self.write_debug('Captures to try: ' + join_nonempty(*capture_dates, delim=', '))
|
||||
|
||||
@@ -303,9 +303,7 @@ class ArteTVCategoryIE(ArteTVBaseIE):
|
||||
if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
|
||||
items.append(video)
|
||||
|
||||
title = (self._og_search_title(webpage, default=None)
|
||||
or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title>', default=None))
|
||||
title = strip_or_none(title.rsplit('|', 1)[0]) or self._generic_title(url)
|
||||
title = strip_or_none(self._generic_title('', webpage, default='').rsplit('|', 1)[0]) or None
|
||||
|
||||
return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title,
|
||||
description=self._og_search_description(webpage, default=None))
|
||||
|
||||
@@ -591,7 +591,12 @@ class BBCCoUkIE(InfoExtractor):
|
||||
class BBCIE(BBCCoUkIE):
|
||||
IE_NAME = 'bbc'
|
||||
IE_DESC = 'BBC'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?(?:
|
||||
bbc\.(?:com|co\.uk)|
|
||||
bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd\.onion|
|
||||
bbcweb3hytmzhn5d532owbu6oqadra5z3ar726vq5kgwwn6aucdccrad\.onion
|
||||
)/(?:[^/]+/)+(?P<id>[^/#?]+)'''
|
||||
|
||||
_MEDIA_SETS = [
|
||||
'pc',
|
||||
@@ -841,6 +846,12 @@ class BBCIE(BBCCoUkIE):
|
||||
'upload_date': '20190604',
|
||||
'categories': ['Psychology'],
|
||||
},
|
||||
}, { # onion routes
|
||||
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.bbcweb3hytmzhn5d532owbu6oqadra5z3ar726vq5kgwwn6aucdccrad.onion/sport/av/football/63195681',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -898,12 +909,8 @@ class BBCIE(BBCCoUkIE):
|
||||
json_ld_info = self._search_json_ld(webpage, playlist_id, default={})
|
||||
timestamp = json_ld_info.get('timestamp')
|
||||
|
||||
playlist_title = json_ld_info.get('title')
|
||||
if not playlist_title:
|
||||
playlist_title = (self._og_search_title(webpage, default=None)
|
||||
or self._html_extract_title(webpage, 'playlist title', default=None))
|
||||
if playlist_title:
|
||||
playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()
|
||||
playlist_title = json_ld_info.get('title') or re.sub(
|
||||
r'(.+)\s*-\s*BBC.*?$', r'\1', self._generic_title('', webpage, default='')).strip() or None
|
||||
|
||||
playlist_description = json_ld_info.get(
|
||||
'description') or self._og_search_description(webpage, default=None)
|
||||
|
||||
@@ -1,426 +1,114 @@
|
||||
import base64
|
||||
import hashlib
|
||||
import itertools
|
||||
import functools
|
||||
import itertools
|
||||
import math
|
||||
import re
|
||||
import urllib
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
compat_urllib_parse_urlparse
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
GeoRestrictedError,
|
||||
InAdvancePagedList,
|
||||
OnDemandPagedList,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
make_archive_id,
|
||||
mimetype2ext,
|
||||
parse_count,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
qualities,
|
||||
smuggle_url,
|
||||
srt_subtitles_timecode,
|
||||
str_or_none,
|
||||
strip_jsonp,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
urlencode_postdata,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class BiliBiliIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:(?:www|bangumi)\.)?
|
||||
bilibili\.(?:tv|com)/
|
||||
(?:
|
||||
(?:
|
||||
video/[aA][vV]|
|
||||
anime/(?P<anime_id>\d+)/play\#
|
||||
)(?P<id>\d+)|
|
||||
(s/)?video/[bB][vV](?P<id_bv>[^/?#&]+)
|
||||
)
|
||||
(?:/?\?p=(?P<page>\d+))?
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bilibili.com/video/av1074402/',
|
||||
'md5': '7ac275ec84a99a6552c5d229659a0fe1',
|
||||
'info_dict': {
|
||||
'id': '1074402_part1',
|
||||
'ext': 'mp4',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'uploader_id': '156160',
|
||||
'uploader': '菊子桑',
|
||||
'upload_date': '20140420',
|
||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||
'timestamp': 1398012678,
|
||||
'tags': ['顶上去报复社会', '该来的总会来的', '金克拉是检验歌曲的唯一标准', '坷垃教主', '金坷垃', '邓紫棋', '治愈系坷垃'],
|
||||
'bv_id': 'BV11x411K7CN',
|
||||
'cid': '1554319',
|
||||
'thumbnail': 'http://i2.hdslb.com/bfs/archive/c79a8cf0347cd7a897c53a2f756e96aead128e8c.jpg',
|
||||
'duration': 308.36,
|
||||
},
|
||||
}, {
|
||||
# Tested in BiliBiliBangumiIE
|
||||
'url': 'http://bangumi.bilibili.com/anime/1869/play#40062',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# bilibili.tv
|
||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://bangumi.bilibili.com/anime/5802/play#100643',
|
||||
'md5': '3f721ad1e75030cc06faf73587cfec57',
|
||||
'info_dict': {
|
||||
'id': '100643_part1',
|
||||
'ext': 'mp4',
|
||||
'title': 'CHAOS;CHILD',
|
||||
'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
|
||||
},
|
||||
'skip': 'Geo-restricted to China',
|
||||
}, {
|
||||
'url': 'http://www.bilibili.com/video/av8903802/',
|
||||
'info_dict': {
|
||||
'id': '8903802_part1',
|
||||
'ext': 'mp4',
|
||||
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||
'upload_date': '20170301',
|
||||
'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
|
||||
'timestamp': 1488382634,
|
||||
'uploader_id': '65880958',
|
||||
'uploader': '阿滴英文',
|
||||
'thumbnail': 'http://i2.hdslb.com/bfs/archive/49267ce20bc246be6304bf369a3ded0256854c23.jpg',
|
||||
'cid': '14694589',
|
||||
'duration': 554.117,
|
||||
'bv_id': 'BV13x41117TL',
|
||||
'tags': ['人文', '英语', '文化', '公开课', '阿滴英文'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# new BV video id format
|
||||
'url': 'https://www.bilibili.com/video/BV1JE411F741',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Anthology
|
||||
'url': 'https://www.bilibili.com/video/BV1bK411W797',
|
||||
'info_dict': {
|
||||
'id': 'BV1bK411W797',
|
||||
'title': '物语中的人物是如何吐槽自己的OP的'
|
||||
},
|
||||
'playlist_count': 17,
|
||||
}, {
|
||||
# Correct matching of single and double quotes in title
|
||||
'url': 'https://www.bilibili.com/video/BV1NY411E7Rx/',
|
||||
'info_dict': {
|
||||
'id': '255513412_part1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vid"eo" Te\'st',
|
||||
'cid': '570602418',
|
||||
'thumbnail': 'http://i2.hdslb.com/bfs/archive/0c0de5a90b6d5b991b8dcc6cde0afbf71d564791.jpg',
|
||||
'upload_date': '20220408',
|
||||
'timestamp': 1649436552,
|
||||
'description': 'Vid"eo" Te\'st',
|
||||
'uploader_id': '1630758804',
|
||||
'bv_id': 'BV1NY411E7Rx',
|
||||
'duration': 60.394,
|
||||
'uploader': 'bili_31244483705',
|
||||
'tags': ['VLOG'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_APP_KEY = 'iVGUTjsxvpLeuDCf'
|
||||
_BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt'
|
||||
|
||||
def _report_error(self, result):
|
||||
if 'message' in result:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True)
|
||||
elif 'code' in result:
|
||||
raise ExtractorError('%s returns error %d' % (self.IE_NAME, result['code']), expected=True)
|
||||
else:
|
||||
raise ExtractorError('Can\'t extract Bangumi episode ID')
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id_bv') or mobj.group('id')
|
||||
|
||||
av_id, bv_id = self._get_video_id_set(video_id, mobj.group('id_bv') is not None)
|
||||
video_id = av_id
|
||||
|
||||
info = {}
|
||||
anime_id = mobj.group('anime_id')
|
||||
page_id = mobj.group('page')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
|
||||
# If the video has no page argument, check to see if it's an anthology
|
||||
if page_id is None:
|
||||
if not self.get_param('noplaylist'):
|
||||
r = self._extract_anthology_entries(bv_id, video_id, webpage)
|
||||
if r is not None:
|
||||
self.to_screen('Downloading anthology %s - add --no-playlist to just download video' % video_id)
|
||||
return r
|
||||
else:
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||
|
||||
if 'anime/' not in url:
|
||||
cid = self._search_regex(
|
||||
r'\bcid(?:["\']:|=)(\d+),["\']page(?:["\']:|=)' + str(page_id), webpage, 'cid',
|
||||
default=None
|
||||
) or self._search_regex(
|
||||
r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
|
||||
default=None
|
||||
) or compat_parse_qs(self._search_regex(
|
||||
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||
r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
|
||||
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
||||
webpage, 'player parameters'))['cid'][0]
|
||||
else:
|
||||
if 'no_bangumi_tip' not in smuggled_data:
|
||||
self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run yt-dlp with %s' % (
|
||||
video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
|
||||
headers = {
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'Referer': url
|
||||
}
|
||||
headers.update(self.geo_verification_headers())
|
||||
|
||||
js = self._download_json(
|
||||
'http://bangumi.bilibili.com/web_api/get_source', video_id,
|
||||
data=urlencode_postdata({'episode_id': video_id}),
|
||||
headers=headers)
|
||||
if 'result' not in js:
|
||||
self._report_error(js)
|
||||
cid = js['result']['cid']
|
||||
|
||||
headers = {
|
||||
'Accept': 'application/json',
|
||||
'Referer': url
|
||||
class BilibiliBaseIE(InfoExtractor):
|
||||
def extract_formats(self, play_info):
|
||||
format_names = {
|
||||
r['quality']: traverse_obj(r, 'new_description', 'display_desc')
|
||||
for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
|
||||
}
|
||||
headers.update(self.geo_verification_headers())
|
||||
|
||||
video_info = self._parse_json(
|
||||
self._search_regex(r'window.__playinfo__\s*=\s*({.+?})</script>', webpage, 'video info', default=None) or '{}',
|
||||
video_id, fatal=False)
|
||||
video_info = video_info.get('data') or {}
|
||||
|
||||
durl = traverse_obj(video_info, ('dash', 'video'))
|
||||
audios = traverse_obj(video_info, ('dash', 'audio')) or []
|
||||
flac_audio = traverse_obj(video_info, ('dash', 'flac', 'audio'))
|
||||
audios = traverse_obj(play_info, ('dash', 'audio', ...))
|
||||
flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
|
||||
if flac_audio:
|
||||
audios.append(flac_audio)
|
||||
entries = []
|
||||
formats = [{
|
||||
'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
|
||||
'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
|
||||
'acodec': audio.get('codecs'),
|
||||
'vcodec': 'none',
|
||||
'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
|
||||
'filesize': int_or_none(audio.get('size'))
|
||||
} for audio in audios]
|
||||
|
||||
RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
|
||||
for num, rendition in enumerate(RENDITIONS, start=1):
|
||||
payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition)
|
||||
sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
|
||||
if not video_info:
|
||||
video_info = self._download_json(
|
||||
'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign),
|
||||
video_id, note='Downloading video info page',
|
||||
headers=headers, fatal=num == len(RENDITIONS))
|
||||
if not video_info:
|
||||
continue
|
||||
formats.extend({
|
||||
'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
|
||||
'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
|
||||
'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
'vcodec': video.get('codecs'),
|
||||
'acodec': 'none' if audios else None,
|
||||
'tbr': float_or_none(video.get('bandwidth'), scale=1000),
|
||||
'filesize': int_or_none(video.get('size')),
|
||||
'quality': int_or_none(video.get('id')),
|
||||
'format': format_names.get(video.get('id')),
|
||||
} for video in traverse_obj(play_info, ('dash', 'video', ...)))
|
||||
|
||||
if not durl and 'durl' not in video_info:
|
||||
if num < len(RENDITIONS):
|
||||
continue
|
||||
self._report_error(video_info)
|
||||
|
||||
formats = []
|
||||
for idx, durl in enumerate(durl or video_info['durl']):
|
||||
formats.append({
|
||||
'url': durl.get('baseUrl') or durl.get('base_url') or durl.get('url'),
|
||||
'ext': mimetype2ext(durl.get('mimeType') or durl.get('mime_type')),
|
||||
'fps': int_or_none(durl.get('frameRate') or durl.get('frame_rate')),
|
||||
'width': int_or_none(durl.get('width')),
|
||||
'height': int_or_none(durl.get('height')),
|
||||
'vcodec': durl.get('codecs'),
|
||||
'acodec': 'none' if audios else None,
|
||||
'tbr': float_or_none(durl.get('bandwidth'), scale=1000),
|
||||
'filesize': int_or_none(durl.get('size')),
|
||||
})
|
||||
for backup_url in traverse_obj(durl, 'backup_url', expected_type=list) or []:
|
||||
formats.append({
|
||||
'url': backup_url,
|
||||
'quality': -2 if 'hd.mp4' in backup_url else -3,
|
||||
})
|
||||
|
||||
for audio in audios:
|
||||
formats.append({
|
||||
'url': audio.get('baseUrl') or audio.get('base_url') or audio.get('url'),
|
||||
'ext': mimetype2ext(audio.get('mimeType') or audio.get('mime_type')),
|
||||
'fps': int_or_none(audio.get('frameRate') or audio.get('frame_rate')),
|
||||
'width': int_or_none(audio.get('width')),
|
||||
'height': int_or_none(audio.get('height')),
|
||||
'acodec': audio.get('codecs'),
|
||||
'vcodec': 'none',
|
||||
'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
|
||||
'filesize': int_or_none(audio.get('size'))
|
||||
})
|
||||
for backup_url in traverse_obj(audio, 'backup_url', expected_type=list) or []:
|
||||
formats.append({
|
||||
'url': backup_url,
|
||||
# backup URLs have lower priorities
|
||||
'quality': -3,
|
||||
})
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'duration': float_or_none(durl.get('length'), 1000),
|
||||
'formats': formats,
|
||||
'http_headers': {
|
||||
'Referer': url,
|
||||
},
|
||||
})
|
||||
break
|
||||
missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
|
||||
if missing_formats:
|
||||
self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
|
||||
'you have to login or become premium member to download them')
|
||||
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
title = self._html_search_regex((
|
||||
r'<h1[^>]+title=(["])(?P<content>[^"]+)',
|
||||
r'<h1[^>]+title=([\'])(?P<content>[^\']+)',
|
||||
r'(?s)<h1[^>]*>(?P<content>.+?)</h1>',
|
||||
self._meta_regex('title')
|
||||
), webpage, 'title', group='content', fatal=False)
|
||||
def json2srt(self, json_data):
|
||||
srt_data = ''
|
||||
for idx, line in enumerate(json_data.get('body') or []):
|
||||
srt_data += (f'{idx + 1}\n'
|
||||
f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
|
||||
f'{line["content"]}\n\n')
|
||||
return srt_data
|
||||
|
||||
# Get part title for anthologies
|
||||
if page_id is not None:
|
||||
# TODO: The json is already downloaded by _extract_anthology_entries. Don't redownload for each video.
|
||||
part_info = traverse_obj(self._download_json(
|
||||
f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp',
|
||||
video_id, note='Extracting videos in anthology'), 'data', expected_type=list)
|
||||
title = title if len(part_info) == 1 else traverse_obj(part_info, (int(page_id) - 1, 'part')) or title
|
||||
|
||||
description = self._html_search_meta('description', webpage)
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time',
|
||||
default=None) or self._html_search_meta(
|
||||
'uploadDate', webpage, 'timestamp', default=None))
|
||||
thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
|
||||
|
||||
# TODO 'view_count' requires deobfuscating Javascript
|
||||
info.update({
|
||||
'id': f'{video_id}_part{page_id or 1}',
|
||||
'cid': cid,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': float_or_none(video_info.get('timelength'), scale=1000),
|
||||
})
|
||||
|
||||
uploader_mobj = re.search(
|
||||
r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>\s*(?P<name>[^<]+?)\s*<',
|
||||
webpage)
|
||||
if uploader_mobj:
|
||||
info.update({
|
||||
'uploader': uploader_mobj.group('name').strip(),
|
||||
'uploader_id': uploader_mobj.group('id'),
|
||||
})
|
||||
|
||||
if not info.get('uploader'):
|
||||
info['uploader'] = self._html_search_meta(
|
||||
'author', webpage, 'uploader', default=None)
|
||||
|
||||
top_level_info = {
|
||||
'tags': traverse_obj(self._download_json(
|
||||
f'https://api.bilibili.com/x/tag/archive/tags?aid={video_id}',
|
||||
video_id, fatal=False, note='Downloading tags'), ('data', ..., 'tag_name')),
|
||||
}
|
||||
|
||||
info['subtitles'] = {
|
||||
def _get_subtitles(self, video_id, initial_state, cid):
|
||||
subtitles = {
|
||||
'danmaku': [{
|
||||
'ext': 'xml',
|
||||
'url': f'https://comment.bilibili.com/{cid}.xml',
|
||||
}]
|
||||
}
|
||||
|
||||
r'''
|
||||
# Requires https://github.com/m13253/danmaku2ass which is licenced under GPL3
|
||||
# See https://github.com/animelover1984/youtube-dl
|
||||
for s in traverse_obj(initial_state, ('videoData', 'subtitle', 'list')) or []:
|
||||
subtitles.setdefault(s['lan'], []).append({
|
||||
'ext': 'srt',
|
||||
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
|
||||
})
|
||||
return subtitles
|
||||
|
||||
raw_danmaku = self._download_webpage(
|
||||
f'https://comment.bilibili.com/{cid}.xml', video_id, fatal=False, note='Downloading danmaku comments')
|
||||
danmaku = NiconicoIE.CreateDanmaku(raw_danmaku, commentType='Bilibili', x=1024, y=576)
|
||||
entries[0]['subtitles'] = {
|
||||
'danmaku': [{
|
||||
'ext': 'ass',
|
||||
'data': danmaku
|
||||
}]
|
||||
}
|
||||
'''
|
||||
def _get_chapters(self, aid, cid):
|
||||
chapters = aid and cid and self._download_json(
|
||||
'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
|
||||
note='Extracting chapters', fatal=False)
|
||||
return traverse_obj(chapters, ('data', 'view_points', ..., {
|
||||
'title': 'content',
|
||||
'start_time': 'from',
|
||||
'end_time': 'to',
|
||||
})) or None
|
||||
|
||||
top_level_info['__post_extractor'] = self.extract_comments(video_id)
|
||||
|
||||
for entry in entries:
|
||||
entry.update(info)
|
||||
|
||||
if len(entries) == 1:
|
||||
entries[0].update(top_level_info)
|
||||
return entries[0]
|
||||
|
||||
for idx, entry in enumerate(entries):
|
||||
entry['id'] = '%s_part%d' % (video_id, (idx + 1))
|
||||
|
||||
return {
|
||||
'id': str(video_id),
|
||||
'bv_id': bv_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
**info, **top_level_info
|
||||
}
|
||||
|
||||
def _extract_anthology_entries(self, bv_id, video_id, webpage):
|
||||
title = self._html_search_regex(
|
||||
(r'<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||
r'(?s)<h1[^>]*>(?P<title>.+?)</h1>',
|
||||
r'<title>(?P<title>.+?)</title>'), webpage, 'title',
|
||||
group='title')
|
||||
json_data = self._download_json(
|
||||
f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp',
|
||||
video_id, note='Extracting videos in anthology')
|
||||
|
||||
if json_data['data']:
|
||||
return self.playlist_from_matches(
|
||||
json_data['data'], bv_id, title, ie=BiliBiliIE.ie_key(),
|
||||
getter=lambda entry: 'https://www.bilibili.com/video/%s?p=%d' % (bv_id, entry['page']))
|
||||
|
||||
def _get_video_id_set(self, id, is_bv):
|
||||
query = {'bvid': id} if is_bv else {'aid': id}
|
||||
response = self._download_json(
|
||||
"http://api.bilibili.cn/x/web-interface/view",
|
||||
id, query=query,
|
||||
note='Grabbing original ID via API')
|
||||
|
||||
if response['code'] == -400:
|
||||
raise ExtractorError('Video ID does not exist', expected=True, video_id=id)
|
||||
elif response['code'] != 0:
|
||||
raise ExtractorError(f'Unknown error occurred during API check (code {response["code"]})',
|
||||
expected=True, video_id=id)
|
||||
return response['data']['aid'], response['data']['bvid']
|
||||
|
||||
def _get_comments(self, video_id, commentPageNumber=0):
|
||||
def _get_comments(self, aid):
|
||||
for idx in itertools.count(1):
|
||||
replies = traverse_obj(
|
||||
self._download_json(
|
||||
f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={video_id}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
|
||||
video_id, note=f'Extracting comments from page {idx}', fatal=False),
|
||||
f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
|
||||
aid, note=f'Extracting comments from page {idx}', fatal=False),
|
||||
('data', 'replies'))
|
||||
if not replies:
|
||||
return
|
||||
@@ -436,75 +124,308 @@ class BiliBiliIE(InfoExtractor):
|
||||
'timestamp': reply.get('ctime'),
|
||||
'parent': reply.get('parent') or 'root',
|
||||
}
|
||||
for children in map(self._get_all_children, reply.get('replies') or []):
|
||||
for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
|
||||
yield from children
|
||||
|
||||
|
||||
class BiliBiliBangumiIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)'
|
||||
|
||||
IE_NAME = 'bangumi.bilibili.com'
|
||||
IE_DESC = 'BiliBili番剧'
|
||||
class BiliBiliIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/video/[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://bangumi.bilibili.com/anime/1869',
|
||||
'url': 'https://www.bilibili.com/video/BV13x41117TL',
|
||||
'info_dict': {
|
||||
'id': '1869',
|
||||
'title': '混沌武士',
|
||||
'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
|
||||
'id': 'BV13x41117TL',
|
||||
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||
'ext': 'mp4',
|
||||
'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
|
||||
'uploader_id': '65880958',
|
||||
'uploader': '阿滴英文',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'duration': 554.117,
|
||||
'tags': list,
|
||||
'comment_count': int,
|
||||
'upload_date': '20170301',
|
||||
'timestamp': 1488353834,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
},
|
||||
'playlist_count': 26,
|
||||
}, {
|
||||
'url': 'http://bangumi.bilibili.com/anime/1869',
|
||||
# old av URL version
|
||||
'url': 'http://www.bilibili.com/video/av1074402/',
|
||||
'info_dict': {
|
||||
'id': '1869',
|
||||
'title': '混沌武士',
|
||||
'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
|
||||
'ext': 'mp4',
|
||||
'uploader': '菊子桑',
|
||||
'uploader_id': '156160',
|
||||
'id': 'BV11x411K7CN',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'duration': 308.36,
|
||||
'upload_date': '20140420',
|
||||
'timestamp': 1397983878,
|
||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'tags': list,
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'note': 'Anthology',
|
||||
'url': 'https://www.bilibili.com/video/BV1bK411W797',
|
||||
'info_dict': {
|
||||
'id': 'BV1bK411W797',
|
||||
'title': '物语中的人物是如何吐槽自己的OP的'
|
||||
},
|
||||
'playlist_count': 18,
|
||||
'playlist': [{
|
||||
'md5': '91da8621454dd58316851c27c68b0c13',
|
||||
'info_dict': {
|
||||
'id': '40062',
|
||||
'id': 'BV1bK411W797_p1',
|
||||
'ext': 'mp4',
|
||||
'title': '混沌武士',
|
||||
'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...',
|
||||
'timestamp': 1414538739,
|
||||
'upload_date': '20141028',
|
||||
'episode': '疾风怒涛 Tempestuous Temperaments',
|
||||
'episode_number': 1,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
'playlist_items': '1',
|
||||
'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
|
||||
'tags': 'count:11',
|
||||
'timestamp': 1589601697,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'uploader': '打牌还是打桩',
|
||||
'uploader_id': '150259984',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'upload_date': '20200516',
|
||||
'view_count': int,
|
||||
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
|
||||
'duration': 90.314,
|
||||
}
|
||||
}]
|
||||
}, {
|
||||
'note': 'Specific page of Anthology',
|
||||
'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
|
||||
'info_dict': {
|
||||
'id': 'BV1bK411W797_p1',
|
||||
'ext': 'mp4',
|
||||
'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
|
||||
'tags': 'count:11',
|
||||
'timestamp': 1589601697,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'uploader': '打牌还是打桩',
|
||||
'uploader_id': '150259984',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'upload_date': '20200516',
|
||||
'view_count': int,
|
||||
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
|
||||
'duration': 90.314,
|
||||
}
|
||||
}, {
|
||||
'note': 'video has subtitles',
|
||||
'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
|
||||
'info_dict': {
|
||||
'id': 'BV12N4y1M7rh',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
|
||||
'tags': list,
|
||||
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
|
||||
'duration': 313.557,
|
||||
'upload_date': '20220709',
|
||||
'uploader': '小夫Tech',
|
||||
'timestamp': 1657347907,
|
||||
'uploader_id': '1326814124',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'subtitles': 'count:2'
|
||||
},
|
||||
'params': {'listsubtitles': True},
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/video/av8903802/',
|
||||
'info_dict': {
|
||||
'id': 'BV13x41117TL',
|
||||
'ext': 'mp4',
|
||||
'title': '阿滴英文|英文歌分享#6 "Closer',
|
||||
'upload_date': '20170301',
|
||||
'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
|
||||
'timestamp': 1488353834,
|
||||
'uploader_id': '65880958',
|
||||
'uploader': '阿滴英文',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'duration': 554.117,
|
||||
'tags': list,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'note': 'video has chapter',
|
||||
'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
|
||||
'info_dict': {
|
||||
'id': 'BV1vL411G7N7',
|
||||
'ext': 'mp4',
|
||||
'title': '如何为你的B站视频添加进度条分段',
|
||||
'timestamp': 1634554558,
|
||||
'upload_date': '20211018',
|
||||
'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
|
||||
'tags': list,
|
||||
'uploader': '爱喝咖啡的当麻',
|
||||
'duration': 669.482,
|
||||
'uploader_id': '1680903',
|
||||
'chapters': 'count:6',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url)
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
|
||||
|
||||
video_data = initial_state['videoData']
|
||||
video_id, title = video_data['bvid'], video_data.get('title')
|
||||
|
||||
# Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
|
||||
page_list_json = traverse_obj(
|
||||
self._download_json(
|
||||
'https://api.bilibili.com/x/player/pagelist', video_id,
|
||||
fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
|
||||
note='Extracting videos in anthology'),
|
||||
'data', expected_type=list) or []
|
||||
is_anthology = len(page_list_json) > 1
|
||||
|
||||
part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
|
||||
if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
|
||||
return self.playlist_from_matches(
|
||||
page_list_json, video_id, title, ie=BiliBiliIE,
|
||||
getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
|
||||
|
||||
if is_anthology:
|
||||
title += f' p{part_id:02d} {traverse_obj(page_list_json, ((part_id or 1) - 1, "part")) or ""}'
|
||||
|
||||
aid = video_data.get('aid')
|
||||
old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
|
||||
|
||||
cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
|
||||
|
||||
return {
|
||||
'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
|
||||
'formats': self.extract_formats(play_info),
|
||||
'_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
|
||||
'title': title,
|
||||
'description': traverse_obj(initial_state, ('videoData', 'desc')),
|
||||
'view_count': traverse_obj(initial_state, ('videoData', 'stat', 'view')),
|
||||
'uploader': traverse_obj(initial_state, ('upData', 'name')),
|
||||
'uploader_id': traverse_obj(initial_state, ('upData', 'mid')),
|
||||
'like_count': traverse_obj(initial_state, ('videoData', 'stat', 'like')),
|
||||
'comment_count': traverse_obj(initial_state, ('videoData', 'stat', 'reply')),
|
||||
'tags': traverse_obj(initial_state, ('tags', ..., 'tag_name')),
|
||||
'thumbnail': traverse_obj(initial_state, ('videoData', 'pic')),
|
||||
'timestamp': traverse_obj(initial_state, ('videoData', 'pubdate')),
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'chapters': self._get_chapters(aid, cid),
|
||||
'subtitles': self.extract_subtitles(video_id, initial_state, cid),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
'http_headers': {'Referer': url},
|
||||
}
|
||||
|
||||
|
||||
class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/(?P<id>(?:ss|ep)\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ss897',
|
||||
'info_dict': {
|
||||
'id': 'ss897',
|
||||
'ext': 'mp4',
|
||||
'series': '神的记事本',
|
||||
'season': '神的记事本',
|
||||
'season_id': 897,
|
||||
'season_number': 1,
|
||||
'episode': '你与旅行包',
|
||||
'episode_number': 2,
|
||||
'title': '神的记事本:第2话 你与旅行包',
|
||||
'duration': 1428.487,
|
||||
'timestamp': 1310809380,
|
||||
'upload_date': '20110716',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ep508406',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
bangumi_id = self._match_id(url)
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Sometimes this API returns a JSONP response
|
||||
season_info = self._download_json(
|
||||
'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id,
|
||||
bangumi_id, transform_source=strip_jsonp)['result']
|
||||
if '您所在的地区无法观看本片' in webpage:
|
||||
raise GeoRestrictedError('This video is restricted')
|
||||
elif ('开通大会员观看' in webpage and '__playinfo__' not in webpage
|
||||
or '正在观看预览,大会员免费看全片' in webpage):
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
|
||||
entries = [{
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}),
|
||||
'ie_key': BiliBiliIE.ie_key(),
|
||||
'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '),
|
||||
'episode': episode.get('index_title'),
|
||||
'episode_number': int_or_none(episode.get('index')),
|
||||
} for episode in season_info['episodes']]
|
||||
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
|
||||
formats = self.extract_formats(play_info)
|
||||
if (not formats and '成为大会员抢先看' in webpage
|
||||
and play_info.get('durl') and not play_info.get('dash')):
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
|
||||
entries = sorted(entries, key=lambda entry: entry.get('episode_number'))
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, bangumi_id,
|
||||
season_info.get('bangumi_title'), season_info.get('evaluate'))
|
||||
season_id = traverse_obj(initial_state, ('mediaInfo', 'season_id'))
|
||||
season_number = season_id and next((
|
||||
idx + 1 for idx, e in enumerate(
|
||||
traverse_obj(initial_state, ('mediaInfo', 'seasons', ...)))
|
||||
if e.get('season_id') == season_id
|
||||
), None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': traverse_obj(initial_state, 'h1Title'),
|
||||
'episode': traverse_obj(initial_state, ('epInfo', 'long_title')),
|
||||
'episode_number': int_or_none(traverse_obj(initial_state, ('epInfo', 'title'))),
|
||||
'series': traverse_obj(initial_state, ('mediaInfo', 'series')),
|
||||
'season': traverse_obj(initial_state, ('mediaInfo', 'season_title')),
|
||||
'season_id': season_id,
|
||||
'season_number': season_number,
|
||||
'thumbnail': traverse_obj(initial_state, ('epInfo', 'cover')),
|
||||
'timestamp': traverse_obj(initial_state, ('epInfo', 'pub_time')),
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'subtitles': self.extract_subtitles(
|
||||
video_id, initial_state, traverse_obj(initial_state, ('epInfo', 'cid'))),
|
||||
'__post_extractor': self.extract_comments(traverse_obj(initial_state, ('epInfo', 'aid'))),
|
||||
'http_headers': {'Referer': url, **self.geo_verification_headers()},
|
||||
}
|
||||
|
||||
|
||||
class BiliBiliBangumiMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/media/md24097891',
|
||||
'info_dict': {
|
||||
'id': '24097891',
|
||||
},
|
||||
'playlist_mincount': 25,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, media_id)
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
|
||||
episode_list = self._download_json(
|
||||
'https://api.bilibili.com/pgc/web/season/section', media_id,
|
||||
query={'season_id': initial_state['mediaInfo']['season_id']},
|
||||
note='Downloading season info')['result']['main_section']['episodes']
|
||||
|
||||
return self.playlist_result((
|
||||
self.url_result(entry['share_url'], BiliBiliBangumiIE, entry['aid'])
|
||||
for entry in episode_list), media_id)
|
||||
|
||||
|
||||
class BilibiliSpaceBaseIE(InfoExtractor):
|
||||
@@ -700,8 +621,7 @@ class BilibiliCategoryIE(InfoExtractor):
|
||||
self._fetch_page, api_url, num_pages, query), size)
|
||||
|
||||
def _real_extract(self, url):
|
||||
u = compat_urllib_parse_urlparse(url)
|
||||
category, subcategory = u.path.split('/')[2:4]
|
||||
category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
|
||||
query = '%s: %s' % (category, subcategory)
|
||||
|
||||
return self.playlist_result(self._entries(category, subcategory, query), query, query)
|
||||
|
||||
@@ -1,11 +1,20 @@
|
||||
import itertools
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
GeoRestrictedError,
|
||||
HEADRequest,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
get_element_by_class,
|
||||
get_element_by_id,
|
||||
get_elements_html_by_class,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_count,
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
)
|
||||
@@ -18,7 +27,7 @@ class BitChuteIE(InfoExtractor):
|
||||
'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/',
|
||||
'md5': '7e427d7ed7af5a75b5855705ec750e2b',
|
||||
'info_dict': {
|
||||
'id': 'szoMrox2JEI',
|
||||
'id': 'UGlrF9o9b-Q',
|
||||
'ext': 'mp4',
|
||||
'title': 'This is the first video on #BitChute !',
|
||||
'description': 'md5:a0337e7b1fe39e32336974af8173a034',
|
||||
@@ -26,6 +35,31 @@ class BitChuteIE(InfoExtractor):
|
||||
'uploader': 'BitChute',
|
||||
'upload_date': '20170103',
|
||||
},
|
||||
}, {
|
||||
# video not downloadable in browser, but we can recover it
|
||||
'url': 'https://www.bitchute.com/video/2s6B3nZjAk7R/',
|
||||
'md5': '05c12397d5354bf24494885b08d24ed1',
|
||||
'info_dict': {
|
||||
'id': '2s6B3nZjAk7R',
|
||||
'ext': 'mp4',
|
||||
'filesize': 71537926,
|
||||
'title': 'STYXHEXENHAMMER666 - Election Fraud, Clinton 2020, EU Armies, and Gun Control',
|
||||
'description': 'md5:228ee93bd840a24938f536aeac9cf749',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'BitChute',
|
||||
'upload_date': '20181113',
|
||||
},
|
||||
'params': {'check_formats': None},
|
||||
}, {
|
||||
# restricted video
|
||||
'url': 'https://www.bitchute.com/video/WEnQU7XGcTdl/',
|
||||
'info_dict': {
|
||||
'id': 'WEnQU7XGcTdl',
|
||||
'ext': 'mp4',
|
||||
'title': 'Impartial Truth - Ein Letzter Appell an die Vernunft',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'Georestricted in DE',
|
||||
}, {
|
||||
'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
|
||||
'only_matching': True,
|
||||
@@ -33,118 +67,168 @@ class BitChuteIE(InfoExtractor):
|
||||
'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
|
||||
_HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
|
||||
'Referer': 'https://www.bitchute.com/',
|
||||
}
|
||||
|
||||
def _check_format(self, video_url, video_id):
|
||||
urls = orderedSet(
|
||||
re.sub(r'(^https?://)(seed\d+)(?=\.bitchute\.com)', fr'\g<1>{host}', video_url)
|
||||
for host in (r'\g<2>', 'seed150', 'seed151', 'seed152', 'seed153'))
|
||||
for url in urls:
|
||||
try:
|
||||
response = self._request_webpage(
|
||||
HEADRequest(url), video_id=video_id, note=f'Checking {url}', headers=self._HEADERS)
|
||||
except ExtractorError as e:
|
||||
self.to_screen(f'{video_id}: URL is invalid, skipping: {e.cause}')
|
||||
continue
|
||||
return {
|
||||
'url': url,
|
||||
'filesize': int_or_none(response.headers.get('Content-Length'))
|
||||
}
|
||||
|
||||
def _raise_if_restricted(self, webpage):
|
||||
page_title = clean_html(get_element_by_class('page-title', webpage)) or ''
|
||||
if re.fullmatch(r'(?:Channel|Video) Restricted', page_title):
|
||||
reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title
|
||||
self.raise_geo_restricted(reason)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://www.bitchute.com/video/%s' % video_id, video_id, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
|
||||
})
|
||||
f'https://www.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
|
||||
webpage, 'title', default=None) or self._html_search_meta(
|
||||
'description', webpage, 'title',
|
||||
default=None) or self._og_search_description(webpage)
|
||||
self._raise_if_restricted(webpage)
|
||||
publish_date = clean_html(get_element_by_class('video-publish-date', webpage))
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||
|
||||
format_urls = []
|
||||
for mobj in re.finditer(
|
||||
r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
|
||||
format_urls.append(mobj.group('url'))
|
||||
format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage))
|
||||
|
||||
formats = [
|
||||
{'url': format_url}
|
||||
for format_url in orderedSet(format_urls)]
|
||||
formats = []
|
||||
for format_ in traverse_obj(entries, (0, 'formats', ...)):
|
||||
if self.get_param('check_formats') is not False:
|
||||
format_.update(self._check_format(format_.pop('url'), video_id) or {})
|
||||
if 'url' not in format_:
|
||||
continue
|
||||
formats.append(format_)
|
||||
|
||||
if not formats:
|
||||
entries = self._parse_html5_media_entries(
|
||||
url, webpage, video_id)
|
||||
if not entries:
|
||||
error = self._html_search_regex(r'<h1 class="page-title">([^<]+)</h1>', webpage, 'error', default='Cannot find video')
|
||||
if error == 'Video Unavailable':
|
||||
raise GeoRestrictedError(error)
|
||||
raise ExtractorError(error, expected=True)
|
||||
formats = entries[0]['formats']
|
||||
|
||||
self._check_formats(formats, video_id)
|
||||
if not formats:
|
||||
raise self.raise_no_formats('Video is unavailable', expected=True, video_id=video_id)
|
||||
self.raise_no_formats(
|
||||
'Video is unavailable. Please make sure this video is playable in the browser '
|
||||
'before reporting this issue.', expected=True, video_id=video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>',
|
||||
webpage, 'description', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:image:src', webpage, 'thumbnail')
|
||||
uploader = self._html_search_regex(
|
||||
(r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>',
|
||||
r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'),
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'class=["\']video-publish-date[^>]+>[^<]+ at \d+:\d+ UTC on (.+?)\.',
|
||||
webpage, 'upload date', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'title': self._html_extract_title(webpage) or self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'uploader': clean_html(get_element_by_class('owner', webpage)),
|
||||
'upload_date': unified_strdate(self._search_regex(
|
||||
r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class BitChuteChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.bitchute.com/channel/victoriaxrave/',
|
||||
'playlist_mincount': 185,
|
||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bitchute.com/channel/bitchute/',
|
||||
'info_dict': {
|
||||
'id': 'victoriaxrave',
|
||||
'id': 'bitchute',
|
||||
'title': 'BitChute',
|
||||
'description': 'md5:5329fb3866125afa9446835594a9b138',
|
||||
},
|
||||
}
|
||||
'playlist': [
|
||||
{
|
||||
'md5': '7e427d7ed7af5a75b5855705ec750e2b',
|
||||
'info_dict': {
|
||||
'id': 'UGlrF9o9b-Q',
|
||||
'ext': 'mp4',
|
||||
'filesize': None,
|
||||
'title': 'This is the first video on #BitChute !',
|
||||
'description': 'md5:a0337e7b1fe39e32336974af8173a034',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'BitChute',
|
||||
'upload_date': '20170103',
|
||||
'duration': 16,
|
||||
'view_count': int,
|
||||
},
|
||||
}
|
||||
],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'playlist_items': '-1',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bitchute.com/playlist/wV9Imujxasw9/',
|
||||
'playlist_mincount': 20,
|
||||
'info_dict': {
|
||||
'id': 'wV9Imujxasw9',
|
||||
'title': 'Bruce MacDonald and "The Light of Darkness"',
|
||||
'description': 'md5:04913227d2714af1d36d804aa2ab6b1e',
|
||||
}
|
||||
}]
|
||||
|
||||
_TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
|
||||
PAGE_SIZE = 25
|
||||
HTML_CLASS_NAMES = {
|
||||
'channel': {
|
||||
'container': 'channel-videos-container',
|
||||
'title': 'channel-videos-title',
|
||||
'description': 'channel-videos-text',
|
||||
},
|
||||
'playlist': {
|
||||
'container': 'playlist-video',
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
}
|
||||
|
||||
def _entries(self, channel_id):
|
||||
channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id
|
||||
offset = 0
|
||||
for page_num in itertools.count(1):
|
||||
data = self._download_json(
|
||||
'%sextend/' % channel_url, channel_id,
|
||||
'Downloading channel page %d' % page_num,
|
||||
data=urlencode_postdata({
|
||||
'csrfmiddlewaretoken': self._TOKEN,
|
||||
'name': '',
|
||||
'offset': offset,
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'Referer': channel_url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Cookie': 'csrftoken=%s' % self._TOKEN,
|
||||
})
|
||||
if data.get('success') is False:
|
||||
break
|
||||
html = data.get('html')
|
||||
if not html:
|
||||
break
|
||||
video_ids = re.findall(
|
||||
r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)',
|
||||
html)
|
||||
if not video_ids:
|
||||
break
|
||||
offset += len(video_ids)
|
||||
for video_id in video_ids:
|
||||
yield self.url_result(
|
||||
'https://www.bitchute.com/video/%s' % video_id,
|
||||
ie=BitChuteIE.ie_key(), video_id=video_id)
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _make_url(playlist_id, playlist_type):
|
||||
return f'https://www.bitchute.com/{playlist_type}/{playlist_id}/'
|
||||
|
||||
def _fetch_page(self, playlist_id, playlist_type, page_num):
|
||||
playlist_url = self._make_url(playlist_id, playlist_type)
|
||||
data = self._download_json(
|
||||
f'{playlist_url}extend/', playlist_id, f'Downloading page {page_num}',
|
||||
data=urlencode_postdata({
|
||||
'csrfmiddlewaretoken': self._TOKEN,
|
||||
'name': '',
|
||||
'offset': page_num * self.PAGE_SIZE,
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'Referer': playlist_url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Cookie': f'csrftoken={self._TOKEN}',
|
||||
})
|
||||
if not data.get('success'):
|
||||
return
|
||||
classes = self.HTML_CLASS_NAMES[playlist_type]
|
||||
for video_html in get_elements_html_by_class(classes['container'], data.get('html')):
|
||||
video_id = self._search_regex(
|
||||
r'<a\s[^>]*\bhref=["\']/video/([^"\'/]+)', video_html, 'video id', default=None)
|
||||
if not video_id:
|
||||
continue
|
||||
yield self.url_result(
|
||||
f'https://www.bitchute.com/video/{video_id}', BitChuteIE, video_id, url_transparent=True,
|
||||
title=clean_html(get_element_by_class(classes['title'], video_html)),
|
||||
description=clean_html(get_element_by_class(classes['description'], video_html)),
|
||||
duration=parse_duration(get_element_by_class('video-duration', video_html)),
|
||||
view_count=parse_count(clean_html(get_element_by_class('video-views', video_html))))
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
playlist_type, playlist_id = self._match_valid_url(url).group('type', 'id')
|
||||
webpage = self._download_webpage(self._make_url(playlist_id, playlist_type), playlist_id)
|
||||
|
||||
page_func = functools.partial(self._fetch_page, playlist_id, playlist_type)
|
||||
return self.playlist_result(
|
||||
self._entries(channel_id), playlist_id=channel_id)
|
||||
OnDemandPagedList(page_func, self.PAGE_SIZE), playlist_id,
|
||||
title=self._html_extract_title(webpage, default=None),
|
||||
description=self._html_search_meta(
|
||||
('description', 'og:description', 'twitter:description'), webpage, default=None),
|
||||
playlist_count=int_or_none(self._html_search_regex(
|
||||
r'<span>(\d+)\s+videos?</span>', webpage, 'playlist count', default=None)))
|
||||
|
||||
@@ -27,8 +27,7 @@ class BreitBartIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': (self._og_search_title(webpage, default=None)
|
||||
or self._html_extract_title(webpage, 'video title')),
|
||||
'title': self._generic_title('', webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'age_limit': self._rta_search(webpage),
|
||||
|
||||
@@ -51,9 +51,7 @@ class CallinIE(InfoExtractor):
|
||||
episode = next_data['props']['pageProps']['episode']
|
||||
|
||||
id = episode['id']
|
||||
title = (episode.get('title')
|
||||
or self._og_search_title(webpage, fatal=False)
|
||||
or self._html_extract_title(webpage))
|
||||
title = episode.get('title') or self._generic_title('', webpage)
|
||||
url = episode['m3u8']
|
||||
formats = self._extract_m3u8_formats(url, display_id, ext='ts')
|
||||
self._sort_formats(formats)
|
||||
|
||||
59
yt_dlp/extractor/camsoda.py
Normal file
59
yt_dlp/extractor/camsoda.py
Normal file
@@ -0,0 +1,59 @@
|
||||
import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, traverse_obj
|
||||
|
||||
|
||||
class CamsodaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.camsoda\.com/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.camsoda.com/lizzhopf',
|
||||
'info_dict': {
|
||||
'id': 'lizzhopf',
|
||||
'ext': 'mp4',
|
||||
'title': 'lizzhopf (lizzhopf) Nude on Cam. Free Live Sex Chat Room - CamSoda',
|
||||
'description': str,
|
||||
'is_live': True,
|
||||
'age_limit': 18,
|
||||
},
|
||||
'skip': 'Room is offline',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id, headers=self.geo_verification_headers())
|
||||
|
||||
data = self._download_json(
|
||||
f'https://camsoda.com/api/v1/video/vtoken/{video_id}', video_id,
|
||||
query={'username': f'guest_{random.randrange(10000, 99999)}'},
|
||||
headers=self.geo_verification_headers())
|
||||
if not data:
|
||||
raise ExtractorError('Unable to find configuration for stream.')
|
||||
elif data.get('private_servers'):
|
||||
raise ExtractorError('Model is in private show.', expected=True)
|
||||
elif not data.get('stream_name'):
|
||||
raise ExtractorError('Model is offline.', expected=True)
|
||||
|
||||
stream_name = traverse_obj(data, 'stream_name', expected_type=str)
|
||||
token = traverse_obj(data, 'token', expected_type=str)
|
||||
|
||||
formats = []
|
||||
for server in traverse_obj(data, ('edge_servers', ...)):
|
||||
formats = self._extract_m3u8_formats(
|
||||
f'https://{server}/{stream_name}_v1/index.m3u8?token={token}',
|
||||
video_id, ext='mp4', m3u8_id='hls', fatal=False, live=True)
|
||||
if formats:
|
||||
break
|
||||
if not formats:
|
||||
self.raise_no_formats('No active streams found', expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._html_extract_title(webpage),
|
||||
'description': self._html_search_meta('description', webpage, default=None),
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
}
|
||||
@@ -1,4 +1,8 @@
|
||||
import base64
|
||||
import codecs
|
||||
import datetime
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import re
|
||||
|
||||
@@ -12,6 +16,8 @@ from ..utils import (
|
||||
multipart_encode,
|
||||
parse_duration,
|
||||
random_birthday,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
@@ -19,7 +25,18 @@ from ..utils import (
|
||||
|
||||
class CDAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
|
||||
_NETRC_MACHINE = 'cdapl'
|
||||
|
||||
_BASE_URL = 'http://www.cda.pl/'
|
||||
_BASE_API_URL = 'https://api.cda.pl'
|
||||
_API_HEADERS = {
|
||||
'Accept': 'application/vnd.cda.public+json',
|
||||
'User-Agent': 'pl.cda 1.0 (version 1.2.88 build 15306; Android 9; Xiaomi Redmi 3S)',
|
||||
}
|
||||
# hardcoded in the app
|
||||
_LOGIN_REQUEST_AUTH = 'Basic YzU3YzBlZDUtYTIzOC00MWQwLWI2NjQtNmZmMWMxY2Y2YzVlOklBTm95QlhRRVR6U09MV1hnV3MwMW0xT2VyNWJNZzV4clRNTXhpNGZJUGVGZ0lWUlo5UGVYTDhtUGZaR1U1U3Q'
|
||||
_BEARER_CACHE = 'cda-bearer'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cda.pl/video/5749950c',
|
||||
'md5': '6f844bf51b15f31fae165365707ae970',
|
||||
@@ -83,8 +100,73 @@ class CDAIE(InfoExtractor):
|
||||
'Content-Type': content_type,
|
||||
}, **kwargs)
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
cached_bearer = self.cache.load(self._BEARER_CACHE, username) or {}
|
||||
if cached_bearer.get('valid_until', 0) > datetime.datetime.now().timestamp() + 5:
|
||||
self._API_HEADERS['Authorization'] = f'Bearer {cached_bearer["token"]}'
|
||||
return
|
||||
|
||||
password_hash = base64.urlsafe_b64encode(hmac.new(
|
||||
b's01m1Oer5IANoyBXQETzSOLWXgWs01m1Oer5bMg5xrTMMxRZ9Pi4fIPeFgIVRZ9PeXL8mPfXQETZGUAN5StRZ9P',
|
||||
''.join(f'{bytes((bt & 255, )).hex():0>2}'
|
||||
for bt in hashlib.md5(password.encode()).digest()).encode(),
|
||||
hashlib.sha256).digest()).decode().replace('=', '')
|
||||
|
||||
token_res = self._download_json(
|
||||
f'{self._BASE_API_URL}/oauth/token', None, 'Logging in', data=b'',
|
||||
headers={**self._API_HEADERS, 'Authorization': self._LOGIN_REQUEST_AUTH},
|
||||
query={
|
||||
'grant_type': 'password',
|
||||
'login': username,
|
||||
'password': password_hash,
|
||||
})
|
||||
self.cache.store(self._BEARER_CACHE, username, {
|
||||
'token': token_res['access_token'],
|
||||
'valid_until': token_res['expires_in'] + datetime.datetime.now().timestamp(),
|
||||
})
|
||||
self._API_HEADERS['Authorization'] = f'Bearer {token_res["access_token"]}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
if 'Authorization' in self._API_HEADERS:
|
||||
return self._api_extract(video_id)
|
||||
else:
|
||||
return self._web_extract(video_id, url)
|
||||
|
||||
def _api_extract(self, video_id):
|
||||
meta = self._download_json(
|
||||
f'{self._BASE_API_URL}/video/{video_id}', video_id, headers=self._API_HEADERS)['video']
|
||||
|
||||
if meta.get('premium') and not meta.get('premium_free'):
|
||||
self.report_drm(video_id)
|
||||
|
||||
uploader = traverse_obj(meta, 'author', 'login')
|
||||
|
||||
formats = [{
|
||||
'url': quality['file'],
|
||||
'format': quality.get('title'),
|
||||
'resolution': quality.get('name'),
|
||||
'height': try_call(lambda: int(quality['name'][:-1])),
|
||||
'filesize': quality.get('length'),
|
||||
} for quality in meta['qualities'] if quality.get('file')]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': meta.get('title'),
|
||||
'description': meta.get('description'),
|
||||
'uploader': None if uploader == 'anonim' else uploader,
|
||||
'average_rating': float_or_none(meta.get('rating')),
|
||||
'thumbnail': meta.get('thumb'),
|
||||
'formats': formats,
|
||||
'duration': meta.get('duration'),
|
||||
'age_limit': 18 if meta.get('for_adults') else 0,
|
||||
'view_count': meta.get('views'),
|
||||
}
|
||||
|
||||
def _web_extract(self, video_id, url):
|
||||
self._set_cookie('cda.pl', 'cda.player', 'html5')
|
||||
webpage = self._download_webpage(
|
||||
self._BASE_URL + '/video/' + video_id, video_id)
|
||||
|
||||
@@ -9,6 +9,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
urlencode_postdata,
|
||||
USER_AGENTS,
|
||||
@@ -16,13 +17,13 @@ from ..utils import (
|
||||
|
||||
|
||||
class CeskaTelevizeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en',
|
||||
'info_dict': {
|
||||
'id': '61924494877028507',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hyde Park Civilizace: Bonus 01 - En',
|
||||
'title': 'Bonus 01 - En - Hyde Park Civilizace',
|
||||
'description': 'English Subtittles',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 81.3,
|
||||
@@ -33,18 +34,29 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
# live stream
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
|
||||
'url': 'http://www.ceskatelevize.cz/zive/ct1/',
|
||||
'info_dict': {
|
||||
'id': 402,
|
||||
'id': '102',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'title': r'ČT1 - živé vysílání online',
|
||||
'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Georestricted to Czech Republic',
|
||||
}, {
|
||||
# another
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
|
||||
'only_matching': True,
|
||||
'info_dict': {
|
||||
'id': 402,
|
||||
'ext': 'mp4',
|
||||
'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'is_live': True,
|
||||
},
|
||||
# 'skip': 'Georestricted to Czech Republic',
|
||||
}, {
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
|
||||
'only_matching': True,
|
||||
@@ -53,21 +65,21 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
|
||||
'info_dict': {
|
||||
'id': '215562210900007-bogotart',
|
||||
'title': 'Queer: Bogotart',
|
||||
'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti. Připravil Peter Serge Butko',
|
||||
'title': 'Bogotart - Queer',
|
||||
'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '61924494877311053',
|
||||
'ext': 'mp4',
|
||||
'title': 'Queer: Bogotart (Varování 18+)',
|
||||
'title': 'Bogotart - Queer (Varování 18+)',
|
||||
'duration': 11.9,
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '61924494877068022',
|
||||
'ext': 'mp4',
|
||||
'title': 'Queer: Bogotart (Queer)',
|
||||
'title': 'Bogotart - Queer (Queer)',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 1558.3,
|
||||
},
|
||||
@@ -84,28 +96,42 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
parsed_url = compat_urllib_parse_urlparse(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
site_name = self._og_search_property('site_name', webpage, fatal=False, default=None)
|
||||
webpage, urlh = self._download_webpage_handle(url, playlist_id)
|
||||
parsed_url = compat_urllib_parse_urlparse(urlh.geturl())
|
||||
site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
|
||||
playlist_title = self._og_search_title(webpage, default=None)
|
||||
if site_name and playlist_title:
|
||||
playlist_title = playlist_title.replace(f' — {site_name}', '', 1)
|
||||
playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0]
|
||||
playlist_description = self._og_search_description(webpage, default=None)
|
||||
if playlist_description:
|
||||
playlist_description = playlist_description.replace('\xa0', ' ')
|
||||
|
||||
if parsed_url.path.startswith('/porady/'):
|
||||
type_ = 'IDEC'
|
||||
if re.search(r'(^/porady|/zive)/', parsed_url.path):
|
||||
next_data = self._search_nextjs_data(webpage, playlist_id)
|
||||
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False)
|
||||
if '/zive/' in parsed_url.path:
|
||||
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False)
|
||||
else:
|
||||
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False)
|
||||
if not idec:
|
||||
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False)
|
||||
if idec:
|
||||
type_ = 'bonus'
|
||||
if not idec:
|
||||
raise ExtractorError('Failed to find IDEC id')
|
||||
iframe_hash = self._download_webpage('https://www.ceskatelevize.cz/v-api/iframe-hash/', playlist_id)
|
||||
webpage = self._download_webpage('https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', playlist_id,
|
||||
query={'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', 'IDEC': idec})
|
||||
iframe_hash = self._download_webpage(
|
||||
'https://www.ceskatelevize.cz/v-api/iframe-hash/',
|
||||
playlist_id, note='Getting IFRAME hash')
|
||||
query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec, }
|
||||
webpage = self._download_webpage(
|
||||
'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php',
|
||||
playlist_id, note='Downloading player', query=query)
|
||||
|
||||
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
|
||||
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
|
||||
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||
self.raise_geo_restricted(NOT_AVAILABLE_STRING)
|
||||
if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen', )):
|
||||
raise ExtractorError('no video with IDEC available', video_id=idec, expected=True)
|
||||
|
||||
type_ = None
|
||||
episode_id = None
|
||||
@@ -174,7 +200,6 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
is_live = item.get('type') == 'LIVE'
|
||||
formats = []
|
||||
for format_id, stream_url in item.get('streamUrls', {}).items():
|
||||
stream_url = stream_url.replace('https://', 'http://')
|
||||
if 'playerType=flash' in stream_url:
|
||||
stream_formats = self._extract_m3u8_formats(
|
||||
stream_url, playlist_id, 'mp4', 'm3u8_native',
|
||||
@@ -196,7 +221,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
entries[num]['formats'].extend(formats)
|
||||
continue
|
||||
|
||||
item_id = item.get('id') or item['assetId']
|
||||
item_id = str_or_none(item.get('id') or item['assetId'])
|
||||
title = item['title']
|
||||
|
||||
duration = float_or_none(item.get('duration'))
|
||||
@@ -227,6 +252,8 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
for e in entries:
|
||||
self._sort_formats(e['formats'])
|
||||
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def _get_subtitles(self, episode_id, subs):
|
||||
|
||||
61
yt_dlp/extractor/cinetecamilano.py
Normal file
61
yt_dlp/extractor/cinetecamilano.py
Normal file
@@ -0,0 +1,61 @@
|
||||
import json
|
||||
import urllib.error
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class CinetecaMilanoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cinetecamilano\.it/film/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cinetecamilano.it/film/1942',
|
||||
'info_dict': {
|
||||
'id': '1942',
|
||||
'ext': 'mp4',
|
||||
'title': 'Il draghetto Gris\u00f9 (4 episodi)',
|
||||
'release_date': '20220129',
|
||||
'thumbnail': r're:.+\.png',
|
||||
'description': 'md5:5328cbe080b93224712b6f17fcaf2c01',
|
||||
'modified_date': '20200520',
|
||||
'duration': 3139,
|
||||
'release_timestamp': 1643446208,
|
||||
'modified_timestamp': int
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
try:
|
||||
film_json = self._download_json(
|
||||
f'https://www.cinetecamilano.it/api/catalogo/{video_id}/?',
|
||||
video_id, headers={
|
||||
'Referer': url,
|
||||
'Authorization': try_get(self._get_cookies('https://www.cinetecamilano.it'), lambda x: f'Bearer {x["cnt-token"].value}') or ''
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if ((isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 500)
|
||||
or isinstance(e.cause, json.JSONDecodeError)):
|
||||
self.raise_login_required(method='cookies')
|
||||
raise
|
||||
if not film_json.get('success') or not film_json.get('archive'):
|
||||
raise ExtractorError('Video information not found')
|
||||
archive = film_json['archive']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': archive.get('title'),
|
||||
'description': strip_or_none(archive.get('description')),
|
||||
'duration': float_or_none(archive.get('duration'), invscale=60),
|
||||
'release_timestamp': parse_iso8601(archive.get('updated_at'), delimiter=' '),
|
||||
'modified_timestamp': parse_iso8601(archive.get('created_at'), delimiter=' '),
|
||||
'thumbnail': urljoin(url, try_get(archive, lambda x: x['thumb']['src'].replace('/public/', '/storage/'))),
|
||||
'formats': self._extract_m3u8_formats(
|
||||
urljoin(url, traverse_obj(archive, ('drm', 'hls'))), video_id, 'mp4')
|
||||
}
|
||||
@@ -66,6 +66,7 @@ from ..utils import (
|
||||
sanitize_filename,
|
||||
sanitize_url,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
strip_or_none,
|
||||
@@ -284,6 +285,7 @@ class InfoExtractor:
|
||||
captions instead of normal subtitles
|
||||
duration: Length of the video in seconds, as an integer or float.
|
||||
view_count: How many users have watched the video on the platform.
|
||||
concurrent_view_count: How many users are currently watching the video on the platform.
|
||||
like_count: Number of positive ratings of the video
|
||||
dislike_count: Number of negative ratings of the video
|
||||
repost_count: Number of reposts of the video
|
||||
@@ -1106,7 +1108,9 @@ class InfoExtractor:
|
||||
return self._downloader.params.get(name, default, *args, **kwargs)
|
||||
return default
|
||||
|
||||
def report_drm(self, video_id, partial=False):
|
||||
def report_drm(self, video_id, partial=NO_DEFAULT):
|
||||
if partial is not NO_DEFAULT:
|
||||
self._downloader.deprecation_warning('InfoExtractor.report_drm no longer accepts the argument partial')
|
||||
self.raise_no_formats('This video is DRM protected', expected=True, video_id=video_id)
|
||||
|
||||
def report_extraction(self, id_or_name):
|
||||
@@ -1466,10 +1470,6 @@ class InfoExtractor:
|
||||
if not json_ld:
|
||||
return {}
|
||||
info = {}
|
||||
if not isinstance(json_ld, (list, tuple, dict)):
|
||||
return info
|
||||
if isinstance(json_ld, dict):
|
||||
json_ld = [json_ld]
|
||||
|
||||
INTERACTION_TYPE_MAP = {
|
||||
'CommentAction': 'comment',
|
||||
@@ -1569,12 +1569,14 @@ class InfoExtractor:
|
||||
extract_chapter_information(e)
|
||||
|
||||
def traverse_json_ld(json_ld, at_top_level=True):
|
||||
for e in json_ld:
|
||||
for e in variadic(json_ld):
|
||||
if not isinstance(e, dict):
|
||||
continue
|
||||
if at_top_level and '@context' not in e:
|
||||
continue
|
||||
if at_top_level and set(e.keys()) == {'@context', '@graph'}:
|
||||
traverse_json_ld(variadic(e['@graph'], allowed_types=(dict,)), at_top_level=False)
|
||||
break
|
||||
traverse_json_ld(e['@graph'], at_top_level=False)
|
||||
continue
|
||||
if expected_type is not None and not is_type(e, expected_type):
|
||||
continue
|
||||
rating = traverse_obj(e, ('aggregateRating', 'ratingValue'), expected_type=float_or_none)
|
||||
@@ -1628,8 +1630,8 @@ class InfoExtractor:
|
||||
continue
|
||||
else:
|
||||
break
|
||||
traverse_json_ld(json_ld)
|
||||
|
||||
traverse_json_ld(json_ld)
|
||||
return filter_dict(info)
|
||||
|
||||
def _search_nextjs_data(self, webpage, video_id, *, transform_source=None, fatal=True, **kw):
|
||||
@@ -1645,7 +1647,10 @@ class InfoExtractor:
|
||||
FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
|
||||
js, arg_keys, arg_vals = self._search_regex(
|
||||
(rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'),
|
||||
webpage, context_name, group=('js', 'arg_keys', 'arg_vals'), fatal=fatal)
|
||||
webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
|
||||
default=NO_DEFAULT if fatal else (None, None, None))
|
||||
if js is None:
|
||||
return {}
|
||||
|
||||
args = dict(zip(arg_keys.split(','), arg_vals.split(',')))
|
||||
|
||||
@@ -3723,7 +3728,8 @@ class InfoExtractor:
|
||||
if not cls.working():
|
||||
desc += ' (**Currently broken**)' if markdown else ' (Currently broken)'
|
||||
|
||||
name = f' - **{cls.IE_NAME}**' if markdown else cls.IE_NAME
|
||||
# Escape emojis. Ref: https://github.com/github/markup/issues/1153
|
||||
name = (' - **%s**' % re.sub(r':(\w+:)', ':\u200B\\g<1>', cls.IE_NAME)) if markdown else cls.IE_NAME
|
||||
return f'{name}:{desc}' if desc else name
|
||||
|
||||
def extract_subtitles(self, *args, **kwargs):
|
||||
@@ -3735,6 +3741,9 @@ class InfoExtractor:
|
||||
def _get_subtitles(self, *args, **kwargs):
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
||||
|
||||
class CommentsDisabled(Exception):
|
||||
"""Raise in _get_comments if comments are disabled for the video"""
|
||||
|
||||
def extract_comments(self, *args, **kwargs):
|
||||
if not self.get_param('getcomments'):
|
||||
return None
|
||||
@@ -3750,6 +3759,8 @@ class InfoExtractor:
|
||||
interrupted = False
|
||||
except KeyboardInterrupt:
|
||||
self.to_screen('Interrupted by user')
|
||||
except self.CommentsDisabled:
|
||||
return {'comments': None, 'comment_count': None}
|
||||
except Exception as e:
|
||||
if self.get_param('ignoreerrors') is not True:
|
||||
raise
|
||||
@@ -3818,9 +3829,11 @@ class InfoExtractor:
|
||||
def _generic_id(url):
|
||||
return urllib.parse.unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
|
||||
|
||||
@staticmethod
|
||||
def _generic_title(url):
|
||||
return urllib.parse.unquote(os.path.splitext(url_basename(url))[0])
|
||||
def _generic_title(self, url='', webpage='', *, default=None):
|
||||
return (self._og_search_title(webpage, default=None)
|
||||
or self._html_extract_title(webpage, default=None)
|
||||
or urllib.parse.unquote(os.path.splitext(url_basename(url))[0])
|
||||
or default)
|
||||
|
||||
@staticmethod
|
||||
def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None):
|
||||
@@ -3843,8 +3856,8 @@ class InfoExtractor:
|
||||
@param default The default value to return when the key is not present (default: [])
|
||||
@param casesense When false, the values are converted to lower case
|
||||
'''
|
||||
val = traverse_obj(
|
||||
self._downloader.params, ('extractor_args', (ie_key or self.ie_key()).lower(), key))
|
||||
ie_key = ie_key if isinstance(ie_key, str) else (ie_key or self).ie_key()
|
||||
val = traverse_obj(self._downloader.params, ('extractor_args', ie_key.lower(), key))
|
||||
if val is None:
|
||||
return [] if default is NO_DEFAULT else default
|
||||
return list(val) if casesense else [x.lower() for x in val]
|
||||
@@ -3874,6 +3887,12 @@ class InfoExtractor:
|
||||
def RetryManager(self, **kwargs):
|
||||
return RetryManager(self.get_param('extractor_retries', 3), self._error_or_warning, **kwargs)
|
||||
|
||||
def _extract_generic_embeds(self, url, *args, info_dict={}, note='Extracting generic embeds', **kwargs):
|
||||
display_id = traverse_obj(info_dict, 'display_id', 'id')
|
||||
self.to_screen(f'{format_field(display_id, None, "%s: ")}{note}')
|
||||
return self._downloader.get_info_extractor('Generic')._extract_embeds(
|
||||
smuggle_url(url, {'block_ies': [self.ie_key()]}), *args, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def extract_from_webpage(cls, ydl, url, webpage):
|
||||
ie = (cls if isinstance(cls._extract_from_webpage, types.MethodType)
|
||||
|
||||
@@ -1,40 +1,16 @@
|
||||
import base64
|
||||
import json
|
||||
import re
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree
|
||||
import zlib
|
||||
from hashlib import sha1
|
||||
from math import floor, pow, sqrt
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vrv import VRVBaseIE
|
||||
from ..aes import aes_cbc_decrypt
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_etree_fromstring,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
bytes_to_intlist,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
join_nonempty,
|
||||
lowercase_escape,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
remove_end,
|
||||
sanitized_Request,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
@@ -42,16 +18,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.crunchyroll.com/welcome/login'
|
||||
_API_BASE = 'https://api.crunchyroll.com'
|
||||
_NETRC_MACHINE = 'crunchyroll'
|
||||
|
||||
def _call_rpc_api(self, method, video_id, note=None, data=None):
|
||||
data = data or {}
|
||||
data['req'] = 'RpcApi' + method
|
||||
data = compat_urllib_parse_urlencode(data).encode('utf-8')
|
||||
return self._download_xml(
|
||||
'https://www.crunchyroll.com/xml/',
|
||||
video_id, note, fatal=False, data=data, headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
params = None
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self._get_cookies(self._LOGIN_URL).get('etp_rt'):
|
||||
@@ -72,7 +39,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
|
||||
login_response = self._download_json(
|
||||
f'{self._API_BASE}/login.1.json', None, 'Logging in',
|
||||
data=compat_urllib_parse_urlencode({
|
||||
data=urllib.parse.urlencode({
|
||||
'account': username,
|
||||
'password': password,
|
||||
'session_id': session_id
|
||||
@@ -82,652 +49,23 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
if not self._get_cookies(self._LOGIN_URL).get('etp_rt'):
|
||||
raise ExtractorError('Login succeeded but did not set etp_rt cookie')
|
||||
|
||||
# Beta-specific, but needed for redirects
|
||||
def _get_beta_embedded_json(self, webpage, display_id):
|
||||
def _get_embedded_json(self, webpage, display_id):
|
||||
initial_state = self._parse_json(self._search_regex(
|
||||
r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'), display_id)
|
||||
app_config = self._parse_json(self._search_regex(
|
||||
r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'), display_id)
|
||||
return initial_state, app_config
|
||||
|
||||
def _redirect_to_beta(self, webpage, iekey, video_id):
|
||||
if not self._get_cookies(self._LOGIN_URL).get('etp_rt'):
|
||||
raise ExtractorError('Received a beta page from non-beta url when not logged in.')
|
||||
initial_state, app_config = self._get_beta_embedded_json(webpage, video_id)
|
||||
url = app_config['baseSiteUrl'] + initial_state['router']['locations']['current']['pathname']
|
||||
self.to_screen(f'{video_id}: Redirected to beta site - {url}')
|
||||
return self.url_result(f'{url}', iekey, video_id)
|
||||
|
||||
@staticmethod
|
||||
def _add_skip_wall(url):
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
qs = compat_urlparse.parse_qs(parsed_url.query)
|
||||
# Always force skip_wall to bypass maturity wall, namely 18+ confirmation message:
|
||||
# > This content may be inappropriate for some people.
|
||||
# > Are you sure you want to continue?
|
||||
# since it's not disabled by default in crunchyroll account's settings.
|
||||
# See https://github.com/ytdl-org/youtube-dl/issues/7202.
|
||||
qs['skip_wall'] = ['1']
|
||||
return compat_urlparse.urlunparse(
|
||||
parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
|
||||
|
||||
|
||||
class CrunchyrollIE(CrunchyrollBaseIE, VRVBaseIE):
|
||||
IE_NAME = 'crunchyroll'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:(?P<prefix>www|m)\.)?(?P<url>
|
||||
crunchyroll\.(?:com|fr)/(?:
|
||||
media(?:-|/\?id=)|
|
||||
(?!series/|watch/)(?:[^/]+/){1,2}[^/?&#]*?
|
||||
)(?P<id>[0-9]+)
|
||||
)(?:[/?&#]|$)'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||
'info_dict': {
|
||||
'id': '645513',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
|
||||
'description': 'md5:2d17137920c64f2f49981a7797d275ef',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Yomiuri Telecasting Corporation (YTV)',
|
||||
'upload_date': '20131013',
|
||||
'url': 're:(?!.*&)',
|
||||
},
|
||||
'params': {
|
||||
# rtmp
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
|
||||
'info_dict': {
|
||||
'id': '589804',
|
||||
'ext': 'flv',
|
||||
'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11',
|
||||
'description': 'md5:2fbc01f90b87e8e9137296f37b461c12',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Danny Choo Network',
|
||||
'upload_date': '20120213',
|
||||
},
|
||||
'params': {
|
||||
# rtmp
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409',
|
||||
'info_dict': {
|
||||
'id': '702409',
|
||||
'ext': 'mp4',
|
||||
'title': compat_str,
|
||||
'description': compat_str,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Re:Zero Partners',
|
||||
'timestamp': 1462098900,
|
||||
'upload_date': '20160501',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.com/konosuba-gods-blessing-on-this-wonderful-world/episode-1-give-me-deliverance-from-this-judicial-injustice-727589',
|
||||
'info_dict': {
|
||||
'id': '727589',
|
||||
'ext': 'mp4',
|
||||
'title': compat_str,
|
||||
'description': compat_str,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Kadokawa Pictures Inc.',
|
||||
'timestamp': 1484130900,
|
||||
'upload_date': '20170111',
|
||||
'series': compat_str,
|
||||
'season': "KONOSUBA -God's blessing on this wonderful world! 2",
|
||||
'season_number': 2,
|
||||
'episode': 'Give Me Deliverance From This Judicial Injustice!',
|
||||
'episode_number': 1,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# geo-restricted (US), 18+ maturity wall, non-premium available
|
||||
'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# A description with double quotes
|
||||
'url': 'http://www.crunchyroll.com/11eyes/episode-1-piros-jszaka-red-night-535080',
|
||||
'info_dict': {
|
||||
'id': '535080',
|
||||
'ext': 'mp4',
|
||||
'title': compat_str,
|
||||
'description': compat_str,
|
||||
'uploader': 'Marvelous AQL Inc.',
|
||||
'timestamp': 1255512600,
|
||||
'upload_date': '20091014',
|
||||
},
|
||||
'params': {
|
||||
# Just test metadata extraction
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# make sure we can extract an uploader name that's not a link
|
||||
'url': 'http://www.crunchyroll.com/hakuoki-reimeiroku/episode-1-dawn-of-the-divine-warriors-606899',
|
||||
'info_dict': {
|
||||
'id': '606899',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hakuoki Reimeiroku Episode 1 – Dawn of the Divine Warriors',
|
||||
'description': 'Ryunosuke was left to die, but Serizawa-san asked him a simple question "Do you want to live?"',
|
||||
'uploader': 'Geneon Entertainment',
|
||||
'upload_date': '20120717',
|
||||
},
|
||||
'params': {
|
||||
# just test metadata extraction
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}, {
|
||||
# A video with a vastly different season name compared to the series name
|
||||
'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532',
|
||||
'info_dict': {
|
||||
'id': '590532',
|
||||
'ext': 'mp4',
|
||||
'title': compat_str,
|
||||
'description': compat_str,
|
||||
'uploader': 'TV TOKYO',
|
||||
'timestamp': 1330956000,
|
||||
'upload_date': '20120305',
|
||||
'series': 'Nyarko-san: Another Crawling Chaos',
|
||||
'season': 'Haiyoru! Nyaruani (ONA)',
|
||||
},
|
||||
'params': {
|
||||
# Just test metadata extraction
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.com/media-723735',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMAT_IDS = {
|
||||
'360': ('60', '106'),
|
||||
'480': ('61', '106'),
|
||||
'720': ('62', '106'),
|
||||
'1080': ('80', '108'),
|
||||
}
|
||||
|
||||
def _download_webpage(self, url_or_request, *args, **kwargs):
|
||||
request = (url_or_request if isinstance(url_or_request, urllib.request.Request)
|
||||
else sanitized_Request(url_or_request))
|
||||
# Accept-Language must be set explicitly to accept any language to avoid issues
|
||||
# similar to https://github.com/ytdl-org/youtube-dl/issues/6797.
|
||||
# Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
|
||||
# should be imposed or not (from what I can see it just takes the first language
|
||||
# ignoring the priority and requires it to correspond the IP). By the way this causes
|
||||
# Crunchyroll to not work in georestriction cases in some browsers that don't place
|
||||
# the locale lang first in header. However allowing any language seems to workaround the issue.
|
||||
request.add_header('Accept-Language', '*')
|
||||
return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
|
||||
|
||||
def _decrypt_subtitles(self, data, iv, id):
|
||||
data = bytes_to_intlist(compat_b64decode(data))
|
||||
iv = bytes_to_intlist(compat_b64decode(iv))
|
||||
id = int(id)
|
||||
|
||||
def obfuscate_key_aux(count, modulo, start):
|
||||
output = list(start)
|
||||
for _ in range(count):
|
||||
output.append(output[-1] + output[-2])
|
||||
# cut off start values
|
||||
output = output[2:]
|
||||
output = list(map(lambda x: x % modulo + 33, output))
|
||||
return output
|
||||
|
||||
def obfuscate_key(key):
|
||||
num1 = int(floor(pow(2, 25) * sqrt(6.9)))
|
||||
num2 = (num1 ^ key) << 5
|
||||
num3 = key ^ num1
|
||||
num4 = num3 ^ (num3 >> 3) ^ num2
|
||||
prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2)))
|
||||
shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest())
|
||||
# Extend 160 Bit hash to 256 Bit
|
||||
return shaHash + [0] * 12
|
||||
|
||||
key = obfuscate_key(id)
|
||||
|
||||
decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
|
||||
return zlib.decompress(decrypted_data)
|
||||
|
||||
def _convert_subtitles_to_srt(self, sub_root):
|
||||
output = ''
|
||||
|
||||
for i, event in enumerate(sub_root.findall('./events/event'), 1):
|
||||
start = event.attrib['start'].replace('.', ',')
|
||||
end = event.attrib['end'].replace('.', ',')
|
||||
text = event.attrib['text'].replace('\\N', '\n')
|
||||
output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
|
||||
return output
|
||||
|
||||
def _convert_subtitles_to_ass(self, sub_root):
|
||||
output = ''
|
||||
|
||||
def ass_bool(strvalue):
|
||||
assvalue = '0'
|
||||
if strvalue == '1':
|
||||
assvalue = '-1'
|
||||
return assvalue
|
||||
|
||||
output = '[Script Info]\n'
|
||||
output += 'Title: %s\n' % sub_root.attrib['title']
|
||||
output += 'ScriptType: v4.00+\n'
|
||||
output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style']
|
||||
output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x']
|
||||
output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y']
|
||||
output += """
|
||||
[V4+ Styles]
|
||||
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
||||
"""
|
||||
for style in sub_root.findall('./styles/style'):
|
||||
output += 'Style: ' + style.attrib['name']
|
||||
output += ',' + style.attrib['font_name']
|
||||
output += ',' + style.attrib['font_size']
|
||||
output += ',' + style.attrib['primary_colour']
|
||||
output += ',' + style.attrib['secondary_colour']
|
||||
output += ',' + style.attrib['outline_colour']
|
||||
output += ',' + style.attrib['back_colour']
|
||||
output += ',' + ass_bool(style.attrib['bold'])
|
||||
output += ',' + ass_bool(style.attrib['italic'])
|
||||
output += ',' + ass_bool(style.attrib['underline'])
|
||||
output += ',' + ass_bool(style.attrib['strikeout'])
|
||||
output += ',' + style.attrib['scale_x']
|
||||
output += ',' + style.attrib['scale_y']
|
||||
output += ',' + style.attrib['spacing']
|
||||
output += ',' + style.attrib['angle']
|
||||
output += ',' + style.attrib['border_style']
|
||||
output += ',' + style.attrib['outline']
|
||||
output += ',' + style.attrib['shadow']
|
||||
output += ',' + style.attrib['alignment']
|
||||
output += ',' + style.attrib['margin_l']
|
||||
output += ',' + style.attrib['margin_r']
|
||||
output += ',' + style.attrib['margin_v']
|
||||
output += ',' + style.attrib['encoding']
|
||||
output += '\n'
|
||||
|
||||
output += """
|
||||
[Events]
|
||||
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
"""
|
||||
for event in sub_root.findall('./events/event'):
|
||||
output += 'Dialogue: 0'
|
||||
output += ',' + event.attrib['start']
|
||||
output += ',' + event.attrib['end']
|
||||
output += ',' + event.attrib['style']
|
||||
output += ',' + event.attrib['name']
|
||||
output += ',' + event.attrib['margin_l']
|
||||
output += ',' + event.attrib['margin_r']
|
||||
output += ',' + event.attrib['margin_v']
|
||||
output += ',' + event.attrib['effect']
|
||||
output += ',' + event.attrib['text']
|
||||
output += '\n'
|
||||
|
||||
return output
|
||||
|
||||
def _extract_subtitles(self, subtitle):
|
||||
sub_root = compat_etree_fromstring(subtitle)
|
||||
return [{
|
||||
'ext': 'srt',
|
||||
'data': self._convert_subtitles_to_srt(sub_root),
|
||||
}, {
|
||||
'ext': 'ass',
|
||||
'data': self._convert_subtitles_to_ass(sub_root),
|
||||
}]
|
||||
|
||||
def _get_subtitles(self, video_id, webpage):
|
||||
subtitles = {}
|
||||
for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage):
|
||||
sub_doc = self._call_rpc_api(
|
||||
'Subtitle_GetXml', video_id,
|
||||
'Downloading subtitles for ' + sub_name, data={
|
||||
'subtitle_script_id': sub_id,
|
||||
})
|
||||
if not isinstance(sub_doc, xml.etree.ElementTree.Element):
|
||||
continue
|
||||
sid = sub_doc.get('id')
|
||||
iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
|
||||
data = xpath_text(sub_doc, 'data', 'subtitle data')
|
||||
if not sid or not iv or not data:
|
||||
continue
|
||||
subtitle = self._decrypt_subtitles(data, iv, sid).decode('utf-8')
|
||||
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
||||
if not lang_code:
|
||||
continue
|
||||
subtitles[lang_code] = self._extract_subtitles(subtitle)
|
||||
return subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
if mobj.group('prefix') == 'm':
|
||||
mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage')
|
||||
webpage_url = self._search_regex(r'<link rel="canonical" href="([^"]+)" />', mobile_webpage, 'webpage_url')
|
||||
else:
|
||||
webpage_url = 'http://www.' + mobj.group('url')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
self._add_skip_wall(webpage_url), video_id,
|
||||
headers=self.geo_verification_headers())
|
||||
if re.search(r'<div id="preload-data">', webpage):
|
||||
return self._redirect_to_beta(webpage, CrunchyrollBetaIE.ie_key(), video_id)
|
||||
note_m = self._html_search_regex(
|
||||
r'<div class="showmedia-trailer-notice">(.+?)</div>',
|
||||
webpage, 'trailer-notice', default='')
|
||||
if note_m:
|
||||
raise ExtractorError(note_m, expected=True)
|
||||
|
||||
mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P<msg>{.+?})\]\)', webpage)
|
||||
if mobj:
|
||||
msg = json.loads(mobj.group('msg'))
|
||||
if msg.get('type') == 'error':
|
||||
raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True)
|
||||
|
||||
if 'To view this, please log in to verify you are 18 or older.' in webpage:
|
||||
self.raise_login_required()
|
||||
|
||||
media = self._parse_json(self._search_regex(
|
||||
r'vilos\.config\.media\s*=\s*({.+?});',
|
||||
webpage, 'vilos media', default='{}'), video_id)
|
||||
media_metadata = media.get('metadata') or {}
|
||||
|
||||
language = self._search_regex(
|
||||
r'(?:vilos\.config\.player\.language|LOCALE)\s*=\s*(["\'])(?P<lang>(?:(?!\1).)+)\1',
|
||||
webpage, 'language', default=None, group='lang')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
(r'(?s)<h1[^>]*>((?:(?!<h1).)*?<(?:span[^>]+itemprop=["\']title["\']|meta[^>]+itemprop=["\']position["\'])[^>]*>(?:(?!<h1).)+?)</h1>',
|
||||
r'<title>(.+?),\s+-\s+.+? Crunchyroll'),
|
||||
webpage, 'video_title', default=None)
|
||||
if not video_title:
|
||||
video_title = re.sub(r'^Watch\s+', '', self._og_search_description(webpage))
|
||||
video_title = re.sub(r' {2,}', ' ', video_title)
|
||||
video_description = (self._parse_json(self._html_search_regex(
|
||||
r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
|
||||
webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
|
||||
|
||||
thumbnails = []
|
||||
thumbnail_url = (self._parse_json(self._html_search_regex(
|
||||
r'<script type="application\/ld\+json">\n\s*(.+?)<\/script>',
|
||||
webpage, 'thumbnail_url', default='{}'), video_id)).get('image')
|
||||
if thumbnail_url:
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
'width': 1920,
|
||||
'height': 1080
|
||||
})
|
||||
|
||||
if video_description:
|
||||
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
|
||||
video_uploader = self._html_search_regex(
|
||||
# try looking for both an uploader that's a link and one that's not
|
||||
[r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
|
||||
webpage, 'video_uploader', default=False)
|
||||
|
||||
requested_languages = self._configuration_arg('language')
|
||||
requested_hardsubs = [('' if val == 'none' else val) for val in self._configuration_arg('hardsub')]
|
||||
language_preference = qualities((requested_languages or [language or ''])[::-1])
|
||||
hardsub_preference = qualities((requested_hardsubs or ['', language or ''])[::-1])
|
||||
|
||||
formats = []
|
||||
for stream in media.get('streams', []):
|
||||
audio_lang = stream.get('audio_lang') or ''
|
||||
hardsub_lang = stream.get('hardsub_lang') or ''
|
||||
if (requested_languages and audio_lang.lower() not in requested_languages
|
||||
or requested_hardsubs and hardsub_lang.lower() not in requested_hardsubs):
|
||||
continue
|
||||
vrv_formats = self._extract_vrv_formats(
|
||||
stream.get('url'), video_id, stream.get('format'),
|
||||
audio_lang, hardsub_lang)
|
||||
for f in vrv_formats:
|
||||
f['language_preference'] = language_preference(audio_lang)
|
||||
f['quality'] = hardsub_preference(hardsub_lang)
|
||||
formats.extend(vrv_formats)
|
||||
if not formats:
|
||||
available_fmts = []
|
||||
for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
|
||||
attrs = extract_attributes(a)
|
||||
href = attrs.get('href')
|
||||
if href and '/freetrial' in href:
|
||||
continue
|
||||
available_fmts.append(fmt)
|
||||
if not available_fmts:
|
||||
for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
|
||||
available_fmts = re.findall(p, webpage)
|
||||
if available_fmts:
|
||||
break
|
||||
if not available_fmts:
|
||||
available_fmts = self._FORMAT_IDS.keys()
|
||||
video_encode_ids = []
|
||||
|
||||
for fmt in available_fmts:
|
||||
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
||||
video_format = fmt + 'p'
|
||||
stream_infos = []
|
||||
streamdata = self._call_rpc_api(
|
||||
'VideoPlayer_GetStandardConfig', video_id,
|
||||
'Downloading media info for %s' % video_format, data={
|
||||
'media_id': video_id,
|
||||
'video_format': stream_format,
|
||||
'video_quality': stream_quality,
|
||||
'current_page': url,
|
||||
})
|
||||
if isinstance(streamdata, xml.etree.ElementTree.Element):
|
||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||
if stream_info is not None:
|
||||
stream_infos.append(stream_info)
|
||||
stream_info = self._call_rpc_api(
|
||||
'VideoEncode_GetStreamInfo', video_id,
|
||||
'Downloading stream info for %s' % video_format, data={
|
||||
'media_id': video_id,
|
||||
'video_format': stream_format,
|
||||
'video_encode_quality': stream_quality,
|
||||
})
|
||||
if isinstance(stream_info, xml.etree.ElementTree.Element):
|
||||
stream_infos.append(stream_info)
|
||||
for stream_info in stream_infos:
|
||||
video_encode_id = xpath_text(stream_info, './video_encode_id')
|
||||
if video_encode_id in video_encode_ids:
|
||||
continue
|
||||
video_encode_ids.append(video_encode_id)
|
||||
|
||||
video_file = xpath_text(stream_info, './file')
|
||||
if not video_file:
|
||||
continue
|
||||
if video_file.startswith('http'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_file, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
|
||||
video_url = xpath_text(stream_info, './host')
|
||||
if not video_url:
|
||||
continue
|
||||
metadata = stream_info.find('./metadata')
|
||||
format_info = {
|
||||
'format': video_format,
|
||||
'height': int_or_none(xpath_text(metadata, './height')),
|
||||
'width': int_or_none(xpath_text(metadata, './width')),
|
||||
}
|
||||
|
||||
if '.fplive.net/' in video_url:
|
||||
video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
|
||||
parsed_video_url = compat_urlparse.urlparse(video_url)
|
||||
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
|
||||
netloc='v.lvlt.crcdn.net',
|
||||
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
|
||||
if self._is_valid_url(direct_video_url, video_id, video_format):
|
||||
format_info.update({
|
||||
'format_id': 'http-' + video_format,
|
||||
'url': direct_video_url,
|
||||
})
|
||||
formats.append(format_info)
|
||||
continue
|
||||
|
||||
format_info.update({
|
||||
'format_id': 'rtmp-' + video_format,
|
||||
'url': video_url,
|
||||
'play_path': video_file,
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats.append(format_info)
|
||||
self._sort_formats(formats)
|
||||
|
||||
metadata = self._call_rpc_api(
|
||||
'VideoPlayer_GetMediaMetadata', video_id,
|
||||
note='Downloading media info', data={
|
||||
'media_id': video_id,
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
for subtitle in media.get('subtitles', []):
|
||||
subtitle_url = subtitle.get('url')
|
||||
if not subtitle_url:
|
||||
continue
|
||||
subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({
|
||||
'url': subtitle_url,
|
||||
'ext': subtitle.get('format', 'ass'),
|
||||
})
|
||||
if not subtitles:
|
||||
subtitles = self.extract_subtitles(video_id, webpage)
|
||||
|
||||
# webpage provide more accurate data than series_title from XML
|
||||
series = self._html_search_regex(
|
||||
r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
|
||||
webpage, 'series', fatal=False)
|
||||
|
||||
season = episode = episode_number = duration = None
|
||||
|
||||
if isinstance(metadata, xml.etree.ElementTree.Element):
|
||||
season = xpath_text(metadata, 'series_title')
|
||||
episode = xpath_text(metadata, 'episode_title')
|
||||
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
|
||||
duration = float_or_none(media_metadata.get('duration'), 1000)
|
||||
|
||||
if not episode:
|
||||
episode = media_metadata.get('title')
|
||||
if not episode_number:
|
||||
episode_number = int_or_none(media_metadata.get('episode_number'))
|
||||
thumbnail_url = try_get(media, lambda x: x['thumbnail']['url'])
|
||||
if thumbnail_url:
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
'width': 640,
|
||||
'height': 360
|
||||
})
|
||||
|
||||
season_number = int_or_none(self._search_regex(
|
||||
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
|
||||
webpage, 'season number', default=None))
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
return merge_dicts({
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails,
|
||||
'uploader': video_uploader,
|
||||
'series': series,
|
||||
'season': season,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}, info)
|
||||
|
||||
|
||||
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||
IE_NAME = 'crunchyroll:playlist'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:\w{2}(?:-\w{2})?/)?(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
|
||||
'info_dict': {
|
||||
'id': 'a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
|
||||
'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi'
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
# geo-restricted (US), 18+ maturity wall, non-premium available
|
||||
'url': 'http://www.crunchyroll.com/cosplay-complex-ova',
|
||||
'info_dict': {
|
||||
'id': 'cosplay-complex-ova',
|
||||
'title': 'Cosplay Complex OVA'
|
||||
},
|
||||
'playlist_count': 3,
|
||||
'skip': 'Georestricted',
|
||||
}, {
|
||||
# geo-restricted (US), 18+ maturity wall, non-premium will be available since 2015.11.14
|
||||
'url': 'http://www.crunchyroll.com/ladies-versus-butlers?skip_wall=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.com/fr/ladies-versus-butlers',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
# https:// gives a 403, but http:// does not
|
||||
self._add_skip_wall(url).replace('https://', 'http://'), show_id,
|
||||
headers=self.geo_verification_headers())
|
||||
if re.search(r'<div id="preload-data">', webpage):
|
||||
return self._redirect_to_beta(webpage, CrunchyrollBetaShowIE.ie_key(), show_id)
|
||||
title = self._html_search_meta('name', webpage, default=None)
|
||||
|
||||
episode_re = r'<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"'
|
||||
season_re = r'<a [^>]+season-dropdown[^>]+>([^<]+)'
|
||||
paths = re.findall(f'(?s){episode_re}|{season_re}', webpage)
|
||||
|
||||
entries, current_season = [], None
|
||||
for ep_id, ep, season in paths:
|
||||
if season:
|
||||
current_season = season
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
f'http://www.crunchyroll.com{ep}', CrunchyrollIE.ie_key(), ep_id, season=current_season))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': show_id,
|
||||
'title': title,
|
||||
'entries': reversed(entries),
|
||||
}
|
||||
|
||||
|
||||
class CrunchyrollBetaBaseIE(CrunchyrollBaseIE):
|
||||
params = None
|
||||
|
||||
def _get_params(self, lang):
|
||||
if not CrunchyrollBetaBaseIE.params:
|
||||
if self._get_cookies(f'https://beta.crunchyroll.com/{lang}').get('etp_rt'):
|
||||
if not CrunchyrollBaseIE.params:
|
||||
if self._get_cookies(f'https://www.crunchyroll.com/{lang}').get('etp_rt'):
|
||||
grant_type, key = 'etp_rt_cookie', 'accountAuthClientId'
|
||||
else:
|
||||
grant_type, key = 'client_id', 'anonClientId'
|
||||
|
||||
initial_state, app_config = self._get_beta_embedded_json(self._download_webpage(
|
||||
f'https://beta.crunchyroll.com/{lang}', None, note='Retrieving main page'), None)
|
||||
api_domain = app_config['cxApiParams']['apiDomain']
|
||||
initial_state, app_config = self._get_embedded_json(self._download_webpage(
|
||||
f'https://www.crunchyroll.com/{lang}', None, note='Retrieving main page'), None)
|
||||
api_domain = app_config['cxApiParams']['apiDomain'].replace('beta.crunchyroll.com', 'www.crunchyroll.com')
|
||||
|
||||
auth_response = self._download_json(
|
||||
f'{api_domain}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
|
||||
@@ -739,7 +77,7 @@ class CrunchyrollBetaBaseIE(CrunchyrollBaseIE):
|
||||
headers={
|
||||
'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']
|
||||
})
|
||||
cms = traverse_obj(policy_response, 'cms_beta', 'cms')
|
||||
cms = policy_response.get('cms_web')
|
||||
bucket = cms['bucket']
|
||||
params = {
|
||||
'Policy': cms['policy'],
|
||||
@@ -749,19 +87,19 @@ class CrunchyrollBetaBaseIE(CrunchyrollBaseIE):
|
||||
locale = traverse_obj(initial_state, ('localization', 'locale'))
|
||||
if locale:
|
||||
params['locale'] = locale
|
||||
CrunchyrollBetaBaseIE.params = (api_domain, bucket, params)
|
||||
return CrunchyrollBetaBaseIE.params
|
||||
CrunchyrollBaseIE.params = (api_domain, bucket, params)
|
||||
return CrunchyrollBaseIE.params
|
||||
|
||||
|
||||
class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
|
||||
IE_NAME = 'crunchyroll:beta'
|
||||
class CrunchyrollBetaIE(CrunchyrollBaseIE):
|
||||
IE_NAME = 'crunchyroll'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://beta\.crunchyroll\.com/
|
||||
https?://(?:beta|www)\.crunchyroll\.com/
|
||||
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||
watch/(?P<id>\w+)
|
||||
(?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
|
||||
'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
|
||||
'info_dict': {
|
||||
'id': 'GY2P1Q98Y',
|
||||
'ext': 'mp4',
|
||||
@@ -777,11 +115,11 @@ class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
|
||||
'season_number': 1,
|
||||
'episode': 'To the Future',
|
||||
'episode_number': 73,
|
||||
'thumbnail': r're:^https://beta.crunchyroll.com/imgsrv/.*\.jpeg$',
|
||||
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8', 'format': 'all[format_id~=hardsub]'},
|
||||
}, {
|
||||
'url': 'https://beta.crunchyroll.com/watch/GYE5WKQGR',
|
||||
'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR',
|
||||
'info_dict': {
|
||||
'id': 'GYE5WKQGR',
|
||||
'ext': 'mp4',
|
||||
@@ -797,12 +135,12 @@ class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
|
||||
'season_number': 1,
|
||||
'episode': 'Porter Robinson presents Shelter the Animation',
|
||||
'episode_number': 0,
|
||||
'thumbnail': r're:^https://beta.crunchyroll.com/imgsrv/.*\.jpeg$',
|
||||
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'Video is Premium only',
|
||||
}, {
|
||||
'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y',
|
||||
'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
|
||||
@@ -901,15 +239,15 @@ class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
|
||||
}
|
||||
|
||||
|
||||
class CrunchyrollBetaShowIE(CrunchyrollBetaBaseIE):
|
||||
IE_NAME = 'crunchyroll:playlist:beta'
|
||||
class CrunchyrollBetaShowIE(CrunchyrollBaseIE):
|
||||
IE_NAME = 'crunchyroll:playlist'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://beta\.crunchyroll\.com/
|
||||
https?://(?:beta|www)\.crunchyroll\.com/
|
||||
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||
series/(?P<id>\w+)
|
||||
(?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://beta.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA',
|
||||
'url': 'https://www.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA',
|
||||
'info_dict': {
|
||||
'id': 'GY19NQ2QR',
|
||||
'title': 'Girl Friend BETA',
|
||||
@@ -942,7 +280,7 @@ class CrunchyrollBetaShowIE(CrunchyrollBetaBaseIE):
|
||||
episode_display_id = episode['slug_title']
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'url': f'https://beta.crunchyroll.com/{lang}watch/{episode_id}/{episode_display_id}',
|
||||
'url': f'https://www.crunchyroll.com/{lang}watch/{episode_id}/{episode_display_id}',
|
||||
'ie_key': CrunchyrollBetaIE.ie_key(),
|
||||
'id': episode_id,
|
||||
'title': '%s Episode %s – %s' % (episode.get('season_title'), episode.get('episode'), episode.get('title')),
|
||||
|
||||
@@ -275,8 +275,7 @@ class CSpanCongressIE(InfoExtractor):
|
||||
self._search_regex(r'jwsetup\s*=\s*({(?:.|\n)[^;]+});', webpage, 'player config'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
title = (self._og_search_title(webpage, default=None)
|
||||
or self._html_extract_title(webpage, 'video title'))
|
||||
title = self._generic_title('', webpage)
|
||||
description = (self._og_search_description(webpage, default=None)
|
||||
or self._html_search_meta('description', webpage, 'description', default=None))
|
||||
|
||||
|
||||
@@ -114,18 +114,15 @@ class DetikEmbedIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _extract_from_webpage(self, url, webpage):
|
||||
display_id = url_basename(url)
|
||||
player_type, video_data = self._search_regex(
|
||||
r'<script\s*[^>]+src="https?://(aws)?cdn\.detik\.net\.id/(?P<type>flowplayer|detikVideo)[^>]+>\s*(?P<video_data>{[^}]+})',
|
||||
webpage, 'playerjs', group=('type', 'video_data'), default=(None, ''))
|
||||
|
||||
json_ld_data = self._search_json_ld(webpage, display_id, default={})
|
||||
extra_info_dict = {}
|
||||
|
||||
if not player_type:
|
||||
return
|
||||
|
||||
elif player_type == 'flowplayer':
|
||||
display_id, extra_info_dict = url_basename(url), {}
|
||||
|
||||
if player_type == 'flowplayer':
|
||||
video_json_data = self._parse_json(video_data.replace('\'', '"'), display_id)
|
||||
video_url = video_json_data['videoUrl']
|
||||
|
||||
@@ -151,6 +148,7 @@ class DetikEmbedIE(InfoExtractor):
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, display_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
json_ld_data = self._search_json_ld(webpage, display_id, default={})
|
||||
yield merge_dicts(json_ld_data, extra_info_dict, {
|
||||
'display_id': display_id,
|
||||
'title': self._html_search_meta(['og:title', 'originalTitle'], webpage) or self._html_extract_title(webpage),
|
||||
|
||||
76
yt_dlp/extractor/deuxm.py
Normal file
76
yt_dlp/extractor/deuxm.py
Normal file
@@ -0,0 +1,76 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import url_or_none
|
||||
|
||||
|
||||
class DeuxMIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?2m\.ma/[^/]+/replay/single/(?P<id>([\w.]{1,24})+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://2m.ma/fr/replay/single/6351d439b15e1a613b3debe8',
|
||||
'md5': '5f761f04c9d686e553b685134dca5d32',
|
||||
'info_dict': {
|
||||
'id': '6351d439b15e1a613b3debe8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Grand Angle : Jeudi 20 Octobre 2022',
|
||||
'thumbnail': r're:^https?://2msoread-ww.amagi.tv/mediasfiles/videos/images/.*\.png$'
|
||||
}
|
||||
}, {
|
||||
'url': 'https://2m.ma/fr/replay/single/635c0aeab4eec832622356da',
|
||||
'md5': 'ad6af2f5e4d5b2ad2194a84b6e890b4c',
|
||||
'info_dict': {
|
||||
'id': '635c0aeab4eec832622356da',
|
||||
'ext': 'mp4',
|
||||
'title': 'Journal Amazigh : Vendredi 28 Octobre 2022',
|
||||
'thumbnail': r're:^https?://2msoread-ww.amagi.tv/mediasfiles/videos/images/.*\.png$'
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video = self._download_json(
|
||||
f'https://2m.ma/api/watchDetail/{video_id}', video_id)['response']['News']
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video.get('titre'),
|
||||
'url': video['url'],
|
||||
'description': video.get('description'),
|
||||
'thumbnail': url_or_none(video.get('image')),
|
||||
}
|
||||
|
||||
|
||||
class DeuxMNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?2m\.ma/(?P<lang>\w+)/news/(?P<id>[^/#?]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://2m.ma/fr/news/Kan-Ya-Mkan-d%C3%A9poussi%C3%A8re-l-histoire-du-phare-du-Cap-Beddouza-20221028',
|
||||
'md5': '43d5e693a53fa0b71e8a5204c7d4542a',
|
||||
'info_dict': {
|
||||
'id': '635c5d1233b83834e35b282e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kan Ya Mkan d\u00e9poussi\u00e8re l\u2019histoire du phare du Cap Beddouza',
|
||||
'description': 'md5:99dcf29b82f1d7f2a4acafed1d487527',
|
||||
'thumbnail': r're:^https?://2msoread-ww.amagi.tv/mediasfiles/videos/images/.*\.png$'
|
||||
}
|
||||
}, {
|
||||
'url': 'https://2m.ma/fr/news/Interview-Casablanca-hors-des-sentiers-battus-avec-Abderrahim-KASSOU-Replay--20221017',
|
||||
'md5': '7aca29f02230945ef635eb8290283c0c',
|
||||
'info_dict': {
|
||||
'id': '634d9e108b70d40bc51a844b',
|
||||
'ext': 'mp4',
|
||||
'title': 'Interview: Casablanca hors des sentiers battus avec Abderrahim KASSOU (Replay) ',
|
||||
'description': 'md5:3b8e78111de9fcc6ef7f7dd6cff2430c',
|
||||
'thumbnail': r're:^https?://2msoread-ww.amagi.tv/mediasfiles/videos/images/.*\.png$'
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
article_name, lang = self._match_valid_url(url).group('id', 'lang')
|
||||
video = self._download_json(
|
||||
f'https://2m.ma/api/articlesByUrl?lang={lang}&url=/news/{article_name}', article_name)['response']['article'][0]
|
||||
return {
|
||||
'id': video['id'],
|
||||
'title': video.get('title'),
|
||||
'url': video['image'][0],
|
||||
'description': video.get('content'),
|
||||
'thumbnail': url_or_none(video.get('cover')),
|
||||
}
|
||||
@@ -1,77 +0,0 @@
|
||||
import string
|
||||
import random
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class DoodStreamIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dood\.(?:to|watch|so|pm|wf)/[ed]/(?P<id>[a-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://dood.to/e/5s1wmbdacezb',
|
||||
'md5': '4568b83b31e13242b3f1ff96c55f0595',
|
||||
'info_dict': {
|
||||
'id': '5s1wmbdacezb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kat Wonders - Monthly May 2020',
|
||||
'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com',
|
||||
'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://dood.watch/d/5s1wmbdacezb',
|
||||
'md5': '4568b83b31e13242b3f1ff96c55f0595',
|
||||
'info_dict': {
|
||||
'id': '5s1wmbdacezb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kat Wonders - Monthly May 2020',
|
||||
'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com',
|
||||
'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://dood.to/d/jzrxn12t2s7n',
|
||||
'md5': '3207e199426eca7c2aa23c2872e6728a',
|
||||
'info_dict': {
|
||||
'id': 'jzrxn12t2s7n',
|
||||
'ext': 'mp4',
|
||||
'title': 'Stacy Cruz Cute ALLWAYSWELL',
|
||||
'description': 'Stacy Cruz Cute ALLWAYSWELL | DoodStream.com',
|
||||
'thumbnail': 'https://img.doodcdn.com/snaps/8edqd5nppkac3x8u.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://dood.so/d/jzrxn12t2s7n',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url = f'https://dood.to/e/{video_id}'
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_meta(
|
||||
('og:title', 'twitter:title'), webpage, default=None) or self._html_extract_title(webpage)
|
||||
thumb = self._html_search_meta(['og:image', 'twitter:image'], webpage, default=None)
|
||||
token = self._html_search_regex(r'[?&]token=([a-z0-9]+)[&\']', webpage, 'token')
|
||||
description = self._html_search_meta(
|
||||
['og:description', 'description', 'twitter:description'], webpage, default=None)
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/66.0',
|
||||
'referer': url
|
||||
}
|
||||
|
||||
pass_md5 = self._html_search_regex(r'(/pass_md5.*?)\'', webpage, 'pass_md5')
|
||||
final_url = ''.join((
|
||||
self._download_webpage(f'https://dood.to{pass_md5}', video_id, headers=headers),
|
||||
*(random.choice(string.ascii_letters + string.digits) for _ in range(10)),
|
||||
f'?token={token}&expiry={int(time.time() * 1000)}',
|
||||
))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': final_url,
|
||||
'http_headers': headers,
|
||||
'ext': 'mp4',
|
||||
'description': description,
|
||||
'thumbnail': thumb,
|
||||
}
|
||||
@@ -745,6 +745,45 @@ class MotorTrendIE(DiscoveryPlusBaseIE):
|
||||
}
|
||||
|
||||
|
||||
class MotorTrendOnDemandIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?motortrendondemand\.com/detail' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.motortrendondemand.com/detail/wheelstanding-dump-truck-stubby-bobs-comeback/37699/784',
|
||||
'info_dict': {
|
||||
'id': '37699',
|
||||
'display_id': 'wheelstanding-dump-truck-stubby-bobs-comeback/37699',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wheelstanding Dump Truck! Stubby Bob’s Comeback',
|
||||
'description': 'md5:996915abe52a1c3dfc83aecea3cce8e7',
|
||||
'season_number': 5,
|
||||
'episode_number': 52,
|
||||
'episode': 'Episode 52',
|
||||
'season': 'Season 5',
|
||||
'thumbnail': r're:^https?://.+\.jpe?g$',
|
||||
'timestamp': 1388534401,
|
||||
'duration': 1887.345,
|
||||
'creator': 'Originals',
|
||||
'series': 'Roadkill',
|
||||
'upload_date': '20140101',
|
||||
'tags': [],
|
||||
},
|
||||
}]
|
||||
|
||||
_PRODUCT = 'MTOD'
|
||||
_DISCO_API_PARAMS = {
|
||||
'disco_host': 'us1-prod-direct.motortrendondemand.com',
|
||||
'realm': 'motortrend',
|
||||
'country': 'us',
|
||||
}
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers.update({
|
||||
'x-disco-params': f'realm={realm}',
|
||||
'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:4.39.1-gi1',
|
||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||
})
|
||||
|
||||
|
||||
class DiscoveryPlusIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:\w{2}/)?video' + DPlayBaseIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import extract_attributes, get_element_html_by_id
|
||||
|
||||
|
||||
class EpochIE(InfoExtractor):
|
||||
@@ -28,13 +29,21 @@ class EpochIE(InfoExtractor):
|
||||
'title': 'Kash Patel: A ‘6-Year-Saga’ of Government Corruption, From Russiagate to Mar-a-Lago',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://www.theepochtimes.com/dick-morris-discusses-his-book-the-return-trumps-big-2024-comeback_4819205.html',
|
||||
'info_dict': {
|
||||
'id': '9489f994-2a20-4812-b233-ac0e5c345632',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dick Morris Discusses His Book ‘The Return: Trump’s Big 2024 Comeback’',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
youmaker_video_id = self._search_regex(r'data-trailer="[\w-]+" data-id="([\w-]+)"', webpage, 'url')
|
||||
youmaker_video_id = extract_attributes(get_element_html_by_id('videobox', webpage))['data-id']
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
f'http://vs1.youmaker.com/assets/{youmaker_video_id}/playlist.m3u8', video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
|
||||
@@ -71,7 +71,7 @@ class FiveTVIE(InfoExtractor):
|
||||
r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"'],
|
||||
webpage, 'video url')
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._html_extract_title(webpage)
|
||||
title = self._generic_title('', webpage)
|
||||
duration = int_or_none(self._og_search_property(
|
||||
'video:duration', webpage, 'duration', default=None))
|
||||
|
||||
|
||||
@@ -12,8 +12,10 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -34,7 +36,8 @@ class FOXIE(InfoExtractor):
|
||||
'creator': 'FOX',
|
||||
'series': 'Gotham',
|
||||
'age_limit': 14,
|
||||
'episode': 'Aftermath: Bruce Wayne Develops Into The Dark Knight'
|
||||
'episode': 'Aftermath: Bruce Wayne Develops Into The Dark Knight',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -165,6 +168,7 @@ class FOXIE(InfoExtractor):
|
||||
'season_number': int_or_none(video.get('seasonNumber')),
|
||||
'episode': video.get('name'),
|
||||
'episode_number': int_or_none(video.get('episodeNumber')),
|
||||
'thumbnail': traverse_obj(video, ('images', 'still', 'raw'), expected_type=url_or_none),
|
||||
'release_year': int_or_none(video.get('releaseYear')),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
@@ -75,6 +75,29 @@ class FoxNewsIE(AMPIE):
|
||||
return info
|
||||
|
||||
|
||||
class FoxNewsVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?foxnews\.com/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.foxnews.com/video/6313058664112',
|
||||
'info_dict': {
|
||||
'id': '6313058664112',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:https://.+/1280x720/match/image\.jpg',
|
||||
'upload_date': '20220930',
|
||||
'description': 'New York City, Kids Therapy, Biden',
|
||||
'duration': 2415,
|
||||
'title': 'Gutfeld! - Thursday, September 29',
|
||||
'timestamp': 1664527538,
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks'],
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(f'https://video.foxnews.com/v/{video_id}', FoxNewsIE, video_id)
|
||||
|
||||
|
||||
class FoxNewsArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:insider\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
|
||||
IE_NAME = 'foxnews:article'
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import os
|
||||
import re
|
||||
import types
|
||||
import urllib.parse
|
||||
import xml.etree.ElementTree
|
||||
|
||||
@@ -31,6 +32,7 @@ from ..utils import (
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
variadic,
|
||||
xpath_attr,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
@@ -1979,22 +1981,6 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 6,
|
||||
},
|
||||
{
|
||||
# Squarespace video embed, 2019-08-28
|
||||
'url': 'http://ootboxford.com',
|
||||
'info_dict': {
|
||||
'id': 'Tc7b_JGdZfw',
|
||||
'title': 'Out of the Blue, at Childish Things 10',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:a83d0026666cf5ee970f8bd1cfd69c7f',
|
||||
'uploader_id': 'helendouglashouse',
|
||||
'uploader': 'Helen & Douglas House',
|
||||
'upload_date': '20140328',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# {
|
||||
# # Zype embed
|
||||
# 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
|
||||
@@ -2413,40 +2399,6 @@ class GenericIE(InfoExtractor):
|
||||
'upload_date': '20210111',
|
||||
}
|
||||
},
|
||||
{
|
||||
'note': 'Rumble embed',
|
||||
'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html',
|
||||
'md5': '53af34098a7f92c4e51cf0bd1c33f009',
|
||||
'info_dict': {
|
||||
'id': 'vb0ofn',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1612662578,
|
||||
'uploader': 'LovingMontana',
|
||||
'channel': 'LovingMontana',
|
||||
'upload_date': '20210207',
|
||||
'title': 'Winter-loving dog helps girls dig a snow fort ',
|
||||
'channel_url': 'https://rumble.com/c/c-546523',
|
||||
'thumbnail': 'https://sp.rmbl.ws/s8/1/5/f/x/x/5fxxb.OvCc.1-small-Moose-The-Dog-Helps-Girls-D.jpg',
|
||||
'duration': 103,
|
||||
}
|
||||
},
|
||||
{
|
||||
'note': 'Rumble JS embed',
|
||||
'url': 'https://therightscoop.com/what-does-9-plus-1-plus-1-equal-listen-to-this-audio-of-attempted-kavanaugh-assassins-call-and-youll-get-it',
|
||||
'md5': '4701209ac99095592e73dbba21889690',
|
||||
'info_dict': {
|
||||
'id': 'v15eqxl',
|
||||
'ext': 'mp4',
|
||||
'channel': 'Mr Producer Media',
|
||||
'duration': 92,
|
||||
'title': '911 Audio From The Man Who Wanted To Kill Supreme Court Justice Kavanaugh',
|
||||
'channel_url': 'https://rumble.com/c/RichSementa',
|
||||
'thumbnail': 'https://sp.rmbl.ws/s8/1/P/j/f/A/PjfAe.OvCc-small-911-Audio-From-The-Man-Who-.jpg',
|
||||
'timestamp': 1654892716,
|
||||
'uploader': 'Mr Producer Media',
|
||||
'upload_date': '20220610',
|
||||
}
|
||||
},
|
||||
{
|
||||
'note': 'JSON LD with multiple @type',
|
||||
'url': 'https://www.nu.nl/280161/video/hoe-een-bladvlo-dit-verwoestende-japanse-onkruid-moet-vernietigen.html',
|
||||
@@ -2463,6 +2415,21 @@ class GenericIE(InfoExtractor):
|
||||
'duration': 111.0,
|
||||
}
|
||||
},
|
||||
{
|
||||
'note': 'JSON LD with unexpected data type',
|
||||
'url': 'https://www.autoweek.nl/autotests/artikel/porsche-911-gt3-rs-rij-impressie-2/',
|
||||
'info_dict': {
|
||||
'id': 'porsche-911-gt3-rs-rij-impressie-2',
|
||||
'ext': 'mp4',
|
||||
'title': 'Test: Porsche 911 GT3 RS',
|
||||
'description': 'Je ziet het niet, maar het is er wel. Downforce, hebben we het dan over. En in de nieuwe Porsche 911 GT3 RS is er zelfs heel veel downforce.',
|
||||
'timestamp': 1664920902,
|
||||
'upload_date': '20221004',
|
||||
'thumbnail': r're:^https://media.autoweek.nl/m/.+\.jpg$',
|
||||
'age_limit': 0,
|
||||
'direct': True,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@@ -2594,6 +2561,7 @@ class GenericIE(InfoExtractor):
|
||||
default_search += ':'
|
||||
return self.url_result(default_search + url)
|
||||
|
||||
original_url = url
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
force_videoid = None
|
||||
is_intentional = smuggled_data.get('to_generic')
|
||||
@@ -2738,14 +2706,26 @@ class GenericIE(InfoExtractor):
|
||||
# Site Name | Video Title
|
||||
# Video Title - Tagline | Site Name
|
||||
# and so on and so forth; it's just not practical
|
||||
'title': (self._og_search_title(webpage, default=None)
|
||||
or self._html_extract_title(webpage, 'video title', default='video')),
|
||||
'title': self._generic_title('', webpage, default='video'),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'age_limit': self._rta_search(webpage),
|
||||
})
|
||||
|
||||
domain_name = self._search_regex(r'^(?:https?://)?([^/]*)/.*', url, 'video uploader', default=None)
|
||||
self._downloader.write_debug('Looking for embeds')
|
||||
embeds = list(self._extract_embeds(original_url, webpage, urlh=full_response, info_dict=info_dict))
|
||||
if len(embeds) == 1:
|
||||
return {**info_dict, **embeds[0]}
|
||||
elif embeds:
|
||||
return self.playlist_result(embeds, **info_dict)
|
||||
raise UnsupportedError(url)
|
||||
|
||||
def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}):
|
||||
"""Returns an iterator of video entries"""
|
||||
info_dict = types.MappingProxyType(info_dict) # Prevents accidental mutation
|
||||
video_id = traverse_obj(info_dict, 'display_id', 'id') or self._generic_id(url)
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
actual_url = urlh.geturl() if urlh else url
|
||||
|
||||
# Sometimes embedded video player is hidden behind percent encoding
|
||||
# (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)
|
||||
@@ -2754,38 +2734,20 @@ class GenericIE(InfoExtractor):
|
||||
# There probably should be a second run of generic extractor on unescaped webpage.
|
||||
# webpage = urllib.parse.unquote(webpage)
|
||||
|
||||
# Unescape squarespace embeds to be detected by generic extractor,
|
||||
# see https://github.com/ytdl-org/youtube-dl/issues/21294
|
||||
webpage = re.sub(
|
||||
r'<div[^>]+class=[^>]*?\bsqs-video-wrapper\b[^>]*>',
|
||||
lambda x: unescapeHTML(x.group(0)), webpage)
|
||||
|
||||
# TODO: Move to respective extractors
|
||||
self._downloader.write_debug('Looking for Brightcove embeds')
|
||||
bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
|
||||
if bc_urls:
|
||||
entries = [{
|
||||
'_type': 'url',
|
||||
'url': smuggle_url(bc_url, {'Referer': url}),
|
||||
'ie_key': 'BrightcoveLegacy'
|
||||
} for bc_url in bc_urls]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'title': info_dict['title'],
|
||||
'id': video_id,
|
||||
'entries': entries,
|
||||
}
|
||||
return [self.url_result(smuggle_url(bc_url, {'Referer': url}), BrightcoveLegacyIE)
|
||||
for bc_url in bc_urls]
|
||||
bc_urls = BrightcoveNewIE._extract_brightcove_urls(self, webpage)
|
||||
if bc_urls:
|
||||
return self.playlist_from_matches(
|
||||
bc_urls, video_id, info_dict['title'],
|
||||
getter=lambda x: smuggle_url(x, {'referrer': url}),
|
||||
ie='BrightcoveNew')
|
||||
return [self.url_result(smuggle_url(bc_url, {'Referer': url}), BrightcoveNewIE)
|
||||
for bc_url in bc_urls]
|
||||
|
||||
self._downloader.write_debug('Looking for embeds')
|
||||
embeds = []
|
||||
for ie in self._downloader._ies.values():
|
||||
if ie.ie_key() in smuggled_data.get('block_ies', []):
|
||||
continue
|
||||
gen = ie.extract_from_webpage(self._downloader, url, webpage)
|
||||
current_embeds = []
|
||||
try:
|
||||
@@ -2794,35 +2756,26 @@ class GenericIE(InfoExtractor):
|
||||
except self.StopExtraction:
|
||||
self.report_detected(f'{ie.IE_NAME} exclusive embed', len(current_embeds),
|
||||
embeds and 'discarding other embeds')
|
||||
embeds = current_embeds
|
||||
break
|
||||
return current_embeds
|
||||
except StopIteration:
|
||||
self.report_detected(f'{ie.IE_NAME} embed', len(current_embeds))
|
||||
embeds.extend(current_embeds)
|
||||
|
||||
del current_embeds
|
||||
if len(embeds) == 1:
|
||||
return {**info_dict, **embeds[0]}
|
||||
elif embeds:
|
||||
return self.playlist_result(embeds, **info_dict)
|
||||
if embeds:
|
||||
return embeds
|
||||
|
||||
jwplayer_data = self._find_jwplayer_data(
|
||||
webpage, video_id, transform_source=js_to_json)
|
||||
if jwplayer_data:
|
||||
if isinstance(jwplayer_data.get('playlist'), str):
|
||||
self.report_detected('JW Player playlist')
|
||||
return {
|
||||
**info_dict,
|
||||
'_type': 'url',
|
||||
'ie_key': 'JWPlatform',
|
||||
'url': jwplayer_data['playlist'],
|
||||
}
|
||||
return [self.url_result(jwplayer_data['playlist'], 'JWPlatform')]
|
||||
try:
|
||||
info = self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, require_title=False, base_url=url)
|
||||
if traverse_obj(info, 'formats', ('entries', ..., 'formats')):
|
||||
self.report_detected('JW Player data')
|
||||
return merge_dicts(info, info_dict)
|
||||
return [info]
|
||||
except ExtractorError:
|
||||
# See https://github.com/ytdl-org/youtube-dl/pull/16735
|
||||
pass
|
||||
@@ -2833,11 +2786,8 @@ class GenericIE(InfoExtractor):
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
varname = mobj.group(1)
|
||||
sources = self._parse_json(
|
||||
mobj.group(2), video_id, transform_source=js_to_json,
|
||||
fatal=False) or []
|
||||
if not isinstance(sources, list):
|
||||
sources = [sources]
|
||||
sources = variadic(self._parse_json(
|
||||
mobj.group(2), video_id, transform_source=js_to_json, fatal=False) or [])
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for source in sources:
|
||||
@@ -2850,7 +2800,7 @@ class GenericIE(InfoExtractor):
|
||||
src_type = src_type.lower()
|
||||
ext = determine_ext(src).lower()
|
||||
if src_type == 'video/youtube':
|
||||
return self.url_result(src, YoutubeIE.ie_key())
|
||||
return [self.url_result(src, YoutubeIE.ie_key())]
|
||||
if src_type == 'application/dash+xml' or ext == 'mpd':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
src, video_id, mpd_id='dash', fatal=False)
|
||||
@@ -2868,7 +2818,7 @@ class GenericIE(InfoExtractor):
|
||||
'ext': (mimetype2ext(src_type)
|
||||
or ext if ext in KNOWN_EXTENSIONS else 'mp4'),
|
||||
'http_headers': {
|
||||
'Referer': full_response.geturl(),
|
||||
'Referer': actual_url,
|
||||
},
|
||||
})
|
||||
# https://docs.videojs.com/player#addRemoteTextTrack
|
||||
@@ -2883,28 +2833,26 @@ class GenericIE(InfoExtractor):
|
||||
'url': urllib.parse.urljoin(url, src),
|
||||
'name': sub.get('label'),
|
||||
'http_headers': {
|
||||
'Referer': full_response.geturl(),
|
||||
'Referer': actual_url,
|
||||
},
|
||||
})
|
||||
if formats or subtitles:
|
||||
self.report_detected('video.js embed')
|
||||
self._sort_formats(formats)
|
||||
info_dict['formats'] = formats
|
||||
info_dict['subtitles'] = subtitles
|
||||
return info_dict
|
||||
return [{'formats': formats, 'subtitles': subtitles}]
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(webpage, video_id, default={})
|
||||
if json_ld.get('url') not in (url, None):
|
||||
self.report_detected('JSON LD')
|
||||
return merge_dicts({
|
||||
return [merge_dicts({
|
||||
'_type': 'video' if json_ld.get('ext') else 'url_transparent',
|
||||
'url': smuggle_url(json_ld['url'], {
|
||||
'force_videoid': video_id,
|
||||
'to_generic': True,
|
||||
'http_headers': {'Referer': url},
|
||||
}),
|
||||
}, json_ld, info_dict)
|
||||
}, json_ld)]
|
||||
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
@@ -2975,13 +2923,13 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
return [{
|
||||
'id': flashvars['video_id'],
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
||||
}]
|
||||
if not found:
|
||||
# Broaden the search a little bit
|
||||
found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
|
||||
@@ -3035,17 +2983,14 @@ class GenericIE(InfoExtractor):
|
||||
webpage)
|
||||
if not found:
|
||||
# Look also in Refresh HTTP header
|
||||
refresh_header = full_response.headers.get('Refresh')
|
||||
refresh_header = urlh and urlh.headers.get('Refresh')
|
||||
if refresh_header:
|
||||
found = re.search(REDIRECT_REGEX, refresh_header)
|
||||
if found:
|
||||
new_url = urllib.parse.urljoin(url, unescapeHTML(found.group(1)))
|
||||
if new_url != url:
|
||||
self.report_following_redirect(new_url)
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': new_url,
|
||||
}
|
||||
return [self.url_result(new_url)]
|
||||
else:
|
||||
found = None
|
||||
|
||||
@@ -3056,10 +3001,12 @@ class GenericIE(InfoExtractor):
|
||||
embed_url = self._html_search_meta('twitter:player', webpage, default=None)
|
||||
if embed_url and embed_url != url:
|
||||
self.report_detected('twitter:player iframe')
|
||||
return self.url_result(embed_url)
|
||||
return [self.url_result(embed_url)]
|
||||
|
||||
if not found:
|
||||
raise UnsupportedError(url)
|
||||
return []
|
||||
|
||||
domain_name = self._search_regex(r'^(?:https?://)?([^/]*)/.*', url, 'video uploader', default=None)
|
||||
|
||||
entries = []
|
||||
for video_url in orderedSet(found):
|
||||
@@ -3075,7 +3022,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
video_id = os.path.splitext(video_id)[0]
|
||||
headers = {
|
||||
'referer': full_response.geturl()
|
||||
'referer': actual_url
|
||||
}
|
||||
|
||||
entry_info_dict = {
|
||||
@@ -3099,7 +3046,7 @@ class GenericIE(InfoExtractor):
|
||||
if ext == 'smil':
|
||||
entry_info_dict = {**self._extract_smil_info(video_url, video_id), **entry_info_dict}
|
||||
elif ext == 'xspf':
|
||||
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
|
||||
return [self._extract_xspf_playlist(video_url, video_id)]
|
||||
elif ext == 'm3u8':
|
||||
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', headers=headers)
|
||||
elif ext == 'mpd':
|
||||
@@ -3129,14 +3076,9 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
entries.append(entry_info_dict)
|
||||
|
||||
if len(entries) == 1:
|
||||
return merge_dicts(entries[0], info_dict)
|
||||
else:
|
||||
if len(entries) > 1:
|
||||
for num, e in enumerate(entries, start=1):
|
||||
# 'url' results don't have a title
|
||||
if e.get('title') is not None:
|
||||
e['title'] = '%s (%d)' % (e['title'], num)
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
}
|
||||
return entries
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import make_archive_id
|
||||
from ..utils import make_archive_id, unescapeHTML
|
||||
|
||||
|
||||
class HTML5MediaEmbedIE(InfoExtractor):
|
||||
@@ -17,7 +20,7 @@ class HTML5MediaEmbedIE(InfoExtractor):
|
||||
]
|
||||
|
||||
def _extract_from_webpage(self, url, webpage):
|
||||
video_id, title = self._generic_id(url), self._generic_title(url)
|
||||
video_id, title = self._generic_id(url), self._generic_title(url, webpage)
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') or []
|
||||
for num, entry in enumerate(entries, start=1):
|
||||
entry.update({
|
||||
@@ -29,3 +32,84 @@ class HTML5MediaEmbedIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(entry['formats'])
|
||||
yield entry
|
||||
|
||||
|
||||
class QuotedHTMLIE(InfoExtractor):
|
||||
"""For common cases of quoted/escaped html parts in the webpage"""
|
||||
_VALID_URL = False
|
||||
IE_NAME = 'generic:quoted-html'
|
||||
IE_DESC = False # Do not list
|
||||
_WEBPAGE_TESTS = [{
|
||||
# 2 YouTube embeds in data-html
|
||||
'url': 'https://24tv.ua/bronetransporteri-ozbroyenni-zsu-shho-vidomo-pro-bronovik-wolfhound_n2167966',
|
||||
'info_dict': {
|
||||
'id': 'bronetransporteri-ozbroyenni-zsu-shho-vidomo-pro-bronovik-wolfhound_n2167966',
|
||||
'title': 'Броньовик Wolfhound: гігант, який допомагає ЗСУ знищувати окупантів на фронті',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||
'timestamp': float,
|
||||
'upload_date': str,
|
||||
'description': 'md5:6816e1e5a65304bd7898e4c7eb1b26f7',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'playlist_count': 2
|
||||
}, {
|
||||
# Generic iframe embed of TV24UAPlayerIE within data-html
|
||||
'url': 'https://24tv.ua/harkivyani-zgaduyut-misto-do-viyni-shhemlive-video_n1887584',
|
||||
'info_dict': {
|
||||
'id': '1887584',
|
||||
'ext': 'mp4',
|
||||
'title': 'Харків\'яни згадують місто до війни: щемливе відео',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}, {
|
||||
# YouTube embeds on Squarespace (data-html): https://github.com/ytdl-org/youtube-dl/issues/21294
|
||||
'url': 'https://www.harvardballetcompany.org/past-productions',
|
||||
'info_dict': {
|
||||
'id': 'past-productions',
|
||||
'title': 'Productions — Harvard Ballet Company',
|
||||
'age_limit': 0,
|
||||
'description': 'Past Productions',
|
||||
},
|
||||
'playlist_mincount': 26
|
||||
}, {
|
||||
# Squarespace video embed, 2019-08-28, data-html
|
||||
'url': 'http://ootboxford.com',
|
||||
'info_dict': {
|
||||
'id': 'Tc7b_JGdZfw',
|
||||
'title': 'Out of the Blue, at Childish Things 10',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:a83d0026666cf5ee970f8bd1cfd69c7f',
|
||||
'uploader_id': 'helendouglashouse',
|
||||
'uploader': 'Helen & Douglas House',
|
||||
'upload_date': '20140328',
|
||||
'availability': 'public',
|
||||
'view_count': int,
|
||||
'channel': 'Helen & Douglas House',
|
||||
'comment_count': int,
|
||||
'uploader_url': 'http://www.youtube.com/user/helendouglashouse',
|
||||
'duration': 253,
|
||||
'channel_url': 'https://www.youtube.com/channel/UCTChGezrZVmlYlpMlkmulPA',
|
||||
'playable_in_embed': True,
|
||||
'age_limit': 0,
|
||||
'channel_follower_count': int,
|
||||
'channel_id': 'UCTChGezrZVmlYlpMlkmulPA',
|
||||
'tags': 'count:6',
|
||||
'categories': ['Nonprofits & Activism'],
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://i.ytimg.com/vi/Tc7b_JGdZfw/hqdefault.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_from_webpage(self, url, webpage):
|
||||
combined = ''
|
||||
for _, html in re.findall(r'(?s)\bdata-html=(["\'])((?:(?!\1).)+)\1', webpage):
|
||||
# unescapeHTML can handle " etc., unquote can handle percent encoding
|
||||
unquoted_html = unescapeHTML(urllib.parse.unquote(html))
|
||||
if unquoted_html != html:
|
||||
combined += unquoted_html
|
||||
if combined:
|
||||
yield from self._extract_generic_embeds(url, combined)
|
||||
|
||||
127
yt_dlp/extractor/genius.py
Normal file
127
yt_dlp/extractor/genius.py
Normal file
@@ -0,0 +1,127 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class GeniusIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?genius\.com/videos/(?P<id>[^?/#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://genius.com/videos/Vince-staples-breaks-down-the-meaning-of-when-sparks-fly',
|
||||
'md5': '64c2ad98cfafcfda23bfa0ad0c512f4c',
|
||||
'info_dict': {
|
||||
'id': '6313303597112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vince Staples Breaks Down The Meaning Of “When Sparks Fly”',
|
||||
'description': 'md5:bc15e00342c537c0039d414423ae5752',
|
||||
'tags': 'count:1',
|
||||
'uploader_id': '4863540648001',
|
||||
'duration': 388.416,
|
||||
'upload_date': '20221005',
|
||||
'timestamp': 1664982341,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://genius.com/videos/Breaking-down-drakes-certified-lover-boy-kanye-beef-way-2-sexy-cudi',
|
||||
'md5': 'b8ed87a5efd1473bd027c20a969d4060',
|
||||
'info_dict': {
|
||||
'id': '6271792014001',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:c6355f7fa8a70bc86492a3963919fc15',
|
||||
'description': 'md5:1774638c31548b31b037c09e9b821393',
|
||||
'tags': 'count:3',
|
||||
'uploader_id': '4863540648001',
|
||||
'duration': 2685.099,
|
||||
'upload_date': '20210909',
|
||||
'timestamp': 1631209167,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
metadata = self._search_json(
|
||||
r'<meta content="', webpage, 'metadata', display_id, transform_source=unescapeHTML)
|
||||
video_id = traverse_obj(
|
||||
metadata, ('video', 'provider_id'),
|
||||
('dfp_kv', lambda _, x: x['name'] == 'brightcove_video_id', 'values', 0), get_all=False)
|
||||
if not video_id:
|
||||
raise ExtractorError('Brightcove video id not found in webpage')
|
||||
|
||||
config = self._search_json(r'var\s*APP_CONFIG\s*=', webpage, 'config', video_id, default={})
|
||||
account_id = config.get('brightcove_account_id', '4863540648001')
|
||||
player_id = traverse_obj(
|
||||
config, 'brightcove_standard_web_player_id', 'brightcove_standard_no_autoplay_web_player_id',
|
||||
'brightcove_modal_web_player_id', 'brightcove_song_story_web_player_id', default='S1ZcmcOC1x')
|
||||
|
||||
return self.url_result(
|
||||
smuggle_url(
|
||||
f'https://players.brightcove.net/{account_id}/{player_id}_default/index.html?videoId={video_id}',
|
||||
{'referrer': url}), 'BrightcoveNew', video_id)
|
||||
|
||||
|
||||
class GeniusLyricsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?genius\.com/(?P<id>[^?/#]+)-lyrics[?/#]?'
|
||||
_TESTS = [{
|
||||
'url': 'https://genius.com/Lil-baby-heyy-lyrics',
|
||||
'playlist_mincount': 2,
|
||||
'info_dict': {
|
||||
'id': '8454545',
|
||||
'title': 'Heyy',
|
||||
'description': 'Heyy by Lil Baby',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://genius.com/Outkast-two-dope-boyz-in-a-cadillac-lyrics',
|
||||
'playlist_mincount': 1,
|
||||
'info_dict': {
|
||||
'id': '36239',
|
||||
'title': 'Two Dope Boyz (In a Cadillac)',
|
||||
'description': 'Two Dope Boyz (In a Cadillac) by OutKast',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://genius.com/Playboi-carti-rip-lyrics',
|
||||
'playlist_mincount': 1,
|
||||
'info_dict': {
|
||||
'id': '3710582',
|
||||
'title': 'R.I.P.',
|
||||
'description': 'R.I.P. by Playboi Carti',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
json_string = self._search_json(
|
||||
r'window\.__PRELOADED_STATE__\s*=\s*JSON\.parse\(', webpage, 'json string',
|
||||
display_id, transform_source=js_to_json, contains_pattern=r'\'{(?s:.+)}\'')
|
||||
song_info = self._parse_json(json_string, display_id)
|
||||
song_id = str_or_none(traverse_obj(song_info, ('songPage', 'song')))
|
||||
if not song_id:
|
||||
raise ExtractorError('Song id not found in webpage')
|
||||
|
||||
title = traverse_obj(
|
||||
song_info, ('songPage', 'trackingData', lambda _, x: x['key'] == 'Title', 'value'),
|
||||
get_all=False, default='untitled')
|
||||
artist = traverse_obj(
|
||||
song_info, ('songPage', 'trackingData', lambda _, x: x['key'] == 'Primary Artist', 'value'),
|
||||
get_all=False, default='unknown artist')
|
||||
media = traverse_obj(
|
||||
song_info, ('entities', 'songs', song_id, 'media'), expected_type=list, default=[])
|
||||
|
||||
entries = []
|
||||
for m in media:
|
||||
if m.get('type') in ('video', 'audio') and m.get('url'):
|
||||
if m.get('provider') == 'spotify':
|
||||
self.to_screen(f'{song_id}: Skipping Spotify audio embed')
|
||||
else:
|
||||
entries.append(self.url_result(m['url']))
|
||||
|
||||
return self.playlist_result(entries, song_id, title, f'{title} by {artist}')
|
||||
@@ -20,7 +20,7 @@ class GlideIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_extract_title(webpage, default=None) or self._og_search_title(webpage)
|
||||
title = self._generic_title('', webpage)
|
||||
video_url = self._proto_relative_url(self._search_regex(
|
||||
r'<source[^>]+src=(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'video URL', default=None,
|
||||
|
||||
@@ -1,22 +1,19 @@
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str
|
||||
)
|
||||
from ..compat import compat_HTTPError, compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
str_or_none,
|
||||
try_get,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
@@ -26,6 +23,11 @@ class HotStarBaseIE(InfoExtractor):
|
||||
_API_URL = 'https://api.hotstar.com'
|
||||
_AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
|
||||
|
||||
def _call_api_v1(self, path, *args, **kwargs):
|
||||
return self._download_json(
|
||||
f'{self._API_URL}/o/v1/{path}', *args, **kwargs,
|
||||
headers={'x-country-code': 'IN', 'x-platform-code': 'PCTV'})
|
||||
|
||||
def _call_api_impl(self, path, video_id, query, st=None, cookies=None):
|
||||
st = int_or_none(st) or int(time.time())
|
||||
exp = st + 6000
|
||||
@@ -59,17 +61,6 @@ class HotStarBaseIE(InfoExtractor):
|
||||
response['message'], expected=True)
|
||||
return response['data']
|
||||
|
||||
def _call_api(self, path, video_id, query_name='contentId'):
|
||||
return self._download_json(
|
||||
f'{self._API_URL}/{path}', video_id=video_id,
|
||||
query={
|
||||
query_name: video_id,
|
||||
'tas': 10000,
|
||||
}, headers={
|
||||
'x-country-code': 'IN',
|
||||
'x-platform-code': 'PCTV',
|
||||
})
|
||||
|
||||
def _call_api_v2(self, path, video_id, st=None, cookies=None):
|
||||
return self._call_api_impl(
|
||||
f'{path}/content/{video_id}', video_id, st=st, cookies=cookies, query={
|
||||
@@ -79,6 +70,13 @@ class HotStarBaseIE(InfoExtractor):
|
||||
'os-version': '10',
|
||||
})
|
||||
|
||||
def _playlist_entries(self, path, item_id, root=None, **kwargs):
|
||||
results = self._call_api_v1(path, item_id, **kwargs)['body']['results']
|
||||
for video in traverse_obj(results, (('assets', None), 'items', ...)):
|
||||
if video.get('contentId'):
|
||||
yield self.url_result(
|
||||
HotStarIE._video_url(video['contentId'], root=root), HotStarIE, video['contentId'])
|
||||
|
||||
|
||||
class HotStarIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar'
|
||||
@@ -104,6 +102,7 @@ class HotStarIE(HotStarBaseIE):
|
||||
'duration': 381,
|
||||
'episode': 'Can You Not Spread Rumours?',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847',
|
||||
'info_dict': {
|
||||
@@ -161,7 +160,8 @@ class HotStarIE(HotStarBaseIE):
|
||||
video_type = self._TYPE.get(video_type, video_type)
|
||||
cookies = self._get_cookies(url) # Cookies before any request
|
||||
|
||||
video_data = self._call_api(f'o/v1/{video_type}/detail', video_id)['body']['results']['item']
|
||||
video_data = self._call_api_v1(f'{video_type}/detail', video_id,
|
||||
query={'tas': 10000, 'contentId': video_id})['body']['results']['item']
|
||||
if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'):
|
||||
self.report_drm(video_id)
|
||||
|
||||
@@ -258,16 +258,16 @@ class HotStarPrefixIE(InfoExtractor):
|
||||
'url': 'hotstar:1000076273',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'hotstar:movies:1000057157',
|
||||
'url': 'hotstar:movies:1260009879',
|
||||
'info_dict': {
|
||||
'id': '1000057157',
|
||||
'id': '1260009879',
|
||||
'ext': 'mp4',
|
||||
'title': 'Radha Gopalam',
|
||||
'description': 'md5:be3bc342cc120bbc95b3b0960e2b0d22',
|
||||
'timestamp': 1140805800,
|
||||
'upload_date': '20060224',
|
||||
'duration': 9182,
|
||||
'episode': 'Radha Gopalam',
|
||||
'title': 'Nuvvu Naaku Nachav',
|
||||
'description': 'md5:d43701b1314e6f8233ce33523c043b7d',
|
||||
'timestamp': 1567525674,
|
||||
'upload_date': '20190903',
|
||||
'duration': 10787,
|
||||
'episode': 'Nuvvu Naaku Nachav',
|
||||
},
|
||||
}, {
|
||||
'url': 'hotstar:episode:1000234847',
|
||||
@@ -289,7 +289,7 @@ class HotStarPrefixIE(InfoExtractor):
|
||||
|
||||
class HotStarPlaylistIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com(?:/in)?/tv(?:/[^/]+){2}/list/[^/]+/t-(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
|
||||
'info_dict': {
|
||||
@@ -299,22 +299,49 @@ class HotStarPlaylistIE(HotStarBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/tv/karthika-deepam/15457/list/popular-clips/t-3_2_1272',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
id_ = self._match_id(url)
|
||||
return self.playlist_result(
|
||||
self._playlist_entries('tray/find', id_, query={'tas': 10000, 'uqId': id_}), id_)
|
||||
|
||||
collection = self._call_api('o/v1/tray/find', playlist_id, 'uqId')['body']['results']
|
||||
entries = [
|
||||
self.url_result(HotStarIE._video_url(video['contentId']), HotStarIE, video['contentId'])
|
||||
for video in collection['assets']['items'] if video.get('contentId')]
|
||||
|
||||
return self.playlist_result(entries, playlist_id)
|
||||
class HotStarSeasonIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar:season'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/\w+)/seasons/[^/]+/ss-(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hotstar.com/tv/radhakrishn/1260000646/seasons/season-2/ss-8028',
|
||||
'info_dict': {
|
||||
'id': '8028',
|
||||
},
|
||||
'playlist_mincount': 35,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/tv/ishqbaaz/9567/seasons/season-2/ss-4357',
|
||||
'info_dict': {
|
||||
'id': '4357',
|
||||
},
|
||||
'playlist_mincount': 30,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/tv/bigg-boss/14714/seasons/season-4/ss-8208/',
|
||||
'info_dict': {
|
||||
'id': '8208',
|
||||
},
|
||||
'playlist_mincount': 19,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, season_id = self._match_valid_url(url).groups()
|
||||
return self.playlist_result(self._playlist_entries(
|
||||
'season/asset', season_id, url, query={'tao': 0, 'tas': 0, 'size': 10000, 'id': season_id}), season_id)
|
||||
|
||||
|
||||
class HotStarSeriesIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar:series'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P<id>\d+))'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P<id>\d+))/?(?:[#?]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hotstar.com/in/tv/radhakrishn/1260000646',
|
||||
'info_dict': {
|
||||
@@ -332,22 +359,13 @@ class HotStarSeriesIE(HotStarBaseIE):
|
||||
'info_dict': {
|
||||
'id': '435',
|
||||
},
|
||||
'playlist_mincount': 269,
|
||||
'playlist_mincount': 267,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, series_id = self._match_valid_url(url).groups()
|
||||
headers = {
|
||||
'x-country-code': 'IN',
|
||||
'x-platform-code': 'PCTV',
|
||||
}
|
||||
detail_json = self._download_json(
|
||||
f'{self._API_URL}/o/v1/show/detail?contentId={series_id}', series_id, headers=headers)
|
||||
id = try_get(detail_json, lambda x: x['body']['results']['item']['id'], int)
|
||||
item_json = self._download_json(
|
||||
f'{self._API_URL}/o/v1/tray/g/1/items?etid=0&tao=0&tas=10000&eid={id}', series_id, headers=headers)
|
||||
id_ = self._call_api_v1(
|
||||
'show/detail', series_id, query={'contentId': series_id})['body']['results']['item']['id']
|
||||
|
||||
return self.playlist_result([
|
||||
self.url_result(HotStarIE._video_url(video['contentId'], root=url), HotStarIE, video['contentId'])
|
||||
for video in item_json['body']['results']['items'] if video.get('contentId')
|
||||
], series_id)
|
||||
return self.playlist_result(self._playlist_entries(
|
||||
'tray/g/1/items', series_id, url, query={'tao': 0, 'tas': 10000, 'etid': 0, 'eid': id_}), series_id)
|
||||
|
||||
@@ -150,7 +150,7 @@ class IPrimaIE(InfoExtractor):
|
||||
manifest_url, video_id, mpd_id='dash', fatal=False)
|
||||
self._sort_formats(formats)
|
||||
|
||||
final_result = self._search_json_ld(webpage, video_id) or {}
|
||||
final_result = self._search_json_ld(webpage, video_id, default={})
|
||||
final_result.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
|
||||
@@ -588,8 +588,9 @@ class IqIE(InfoExtractor):
|
||||
ut_list = ['0']
|
||||
|
||||
# bid 0 as an initial format checker
|
||||
dash_paths = self._parse_json(PhantomJSwrapper(self).get(
|
||||
url, html='<!DOCTYPE html>', video_id=video_id, note2='Executing signature code', jscode=self._DASH_JS % {
|
||||
dash_paths = self._parse_json(PhantomJSwrapper(self, timeout=120_000).get(
|
||||
url, note2='Executing signature code (this may take a couple minutes)',
|
||||
html='<!DOCTYPE html>', video_id=video_id, jscode=self._DASH_JS % {
|
||||
'tvid': video_info['tvId'],
|
||||
'vid': video_info['vid'],
|
||||
'src': traverse_obj(next_props, ('initialProps', 'pageProps', 'ptid'),
|
||||
|
||||
277
yt_dlp/extractor/japandiet.py
Normal file
277
yt_dlp/extractor/japandiet.py
Normal file
@@ -0,0 +1,277 @@
|
||||
import re
|
||||
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_qs,
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unsmuggle_url
|
||||
)
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
def _parse_japanese_date(text):
|
||||
if not text:
|
||||
return None
|
||||
ERA_TABLE = {
|
||||
'明治': 1868,
|
||||
'大正': 1912,
|
||||
'昭和': 1926,
|
||||
'平成': 1989,
|
||||
'令和': 2019,
|
||||
}
|
||||
ERA_RE = '|'.join(map(re.escape, ERA_TABLE.keys()))
|
||||
mobj = re.search(rf'({ERA_RE})?(\d+)年(\d+)月(\d+)日', re.sub(r'[\s\u3000]+', '', text))
|
||||
if not mobj:
|
||||
return None
|
||||
era, year, month, day = mobj.groups()
|
||||
year, month, day = map(int, (year, month, day))
|
||||
if era:
|
||||
# example input: 令和5年3月34日
|
||||
# even though each era have their end, don't check here
|
||||
year += ERA_TABLE[era]
|
||||
return '%04d%02d%02d' % (year, month, day)
|
||||
|
||||
|
||||
def _parse_japanese_duration(text):
|
||||
mobj = re.search(r'(?:(\d+)日間?)?(?:(\d+)時間?)?(?:(\d+)分)?(?:(\d+)秒)?', re.sub(r'[\s\u3000]+', '', text or ''))
|
||||
if not mobj:
|
||||
return
|
||||
days, hours, mins, secs = [int_or_none(x, default=0) for x in mobj.groups()]
|
||||
return secs + mins * 60 + hours * 60 * 60 + days * 24 * 60 * 60
|
||||
|
||||
|
||||
class ShugiinItvBaseIE(InfoExtractor):
|
||||
_INDEX_ROOMS = None
|
||||
|
||||
@classmethod
|
||||
def _find_rooms(cls, webpage):
|
||||
return [{
|
||||
'_type': 'url',
|
||||
'id': x.group(1),
|
||||
'title': clean_html(x.group(2)).strip(),
|
||||
'url': smuggle_url(f'https://www.shugiintv.go.jp/jp/index.php?room_id={x.group(1)}', {'g': x.groups()}),
|
||||
'ie_key': ShugiinItvLiveIE.ie_key(),
|
||||
} for x in re.finditer(r'(?s)<a\s+href="[^"]+\?room_id=(room\d+)"\s*class="play_live".+?class="s12_14">(.+?)</td>', webpage)]
|
||||
|
||||
def _fetch_rooms(self):
|
||||
if not self._INDEX_ROOMS:
|
||||
webpage = self._download_webpage(
|
||||
'https://www.shugiintv.go.jp/jp/index.php', None,
|
||||
encoding='euc-jp', note='Downloading proceedings info')
|
||||
ShugiinItvBaseIE._INDEX_ROOMS = self._find_rooms(webpage)
|
||||
return self._INDEX_ROOMS
|
||||
|
||||
|
||||
class ShugiinItvLiveIE(ShugiinItvBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?shugiintv\.go\.jp/(?:jp|en)(?:/index\.php)?$'
|
||||
IE_DESC = '衆議院インターネット審議中継'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.shugiintv.go.jp/jp/index.php',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'title': 'All proceedings for today',
|
||||
},
|
||||
# expect at least one proceedings is running
|
||||
'playlist_mincount': 1,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return super().suitable(url) and not any(x.suitable(url) for x in (ShugiinItvLiveRoomIE, ShugiinItvVodIE))
|
||||
|
||||
def _real_extract(self, url):
|
||||
self.to_screen(
|
||||
'Downloading all running proceedings. To specify one proceeding, use direct link from the website')
|
||||
return self.playlist_result(self._fetch_rooms(), playlist_title='All proceedings for today')
|
||||
|
||||
|
||||
class ShugiinItvLiveRoomIE(ShugiinItvBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?shugiintv\.go\.jp/(?:jp|en)/index\.php\?room_id=(?P<id>room\d+)'
|
||||
IE_DESC = '衆議院インターネット審議中継 (中継)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.shugiintv.go.jp/jp/index.php?room_id=room01',
|
||||
'info_dict': {
|
||||
'id': 'room01',
|
||||
'title': '内閣委員会',
|
||||
},
|
||||
'skip': 'this runs for a time and not every day',
|
||||
}, {
|
||||
'url': 'https://www.shugiintv.go.jp/jp/index.php?room_id=room11',
|
||||
'info_dict': {
|
||||
'id': 'room11',
|
||||
'title': '外務委員会',
|
||||
},
|
||||
'skip': 'this runs for a time and not every day',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smug = unsmuggle_url(url, default={})
|
||||
if smug.get('g'):
|
||||
room_id, title = smug['g']
|
||||
else:
|
||||
room_id = self._match_id(url)
|
||||
title = traverse_obj(self._fetch_rooms(), (lambda k, v: v['id'] == room_id, 'title'), get_all=False)
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
f'https://hlslive.shugiintv.go.jp/{room_id}/amlst:{room_id}/playlist.m3u8',
|
||||
room_id, ext='mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': room_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
|
||||
class ShugiinItvVodIE(ShugiinItvBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?shugiintv\.go\.jp/(?:jp|en)/index\.php\?ex=VL(?:\&[^=]+=[^&]*)*\&deli_id=(?P<id>\d+)'
|
||||
IE_DESC = '衆議院インターネット審議中継 (ビデオライブラリ)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.shugiintv.go.jp/jp/index.php?ex=VL&media_type=&deli_id=53846',
|
||||
'info_dict': {
|
||||
'id': '53846',
|
||||
'title': 'ウクライナ大統領国会演説(オンライン)',
|
||||
'release_date': '20220323',
|
||||
'chapters': 'count:4',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.shugiintv.go.jp/en/index.php?ex=VL&media_type=&deli_id=53846',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
f'https://www.shugiintv.go.jp/jp/index.php?ex=VL&media_type=&deli_id={video_id}', video_id,
|
||||
encoding='euc-jp')
|
||||
|
||||
m3u8_url = self._search_regex(
|
||||
r'id="vtag_src_base_vod"\s*value="(http.+?\.m3u8)"', webpage, 'm3u8 url')
|
||||
m3u8_url = re.sub(r'^http://', 'https://', m3u8_url)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, video_id, ext='mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'<td\s+align="left">(.+)\s*\(\d+分\)',
|
||||
r'<TD.+?<IMG\s*src=".+?/spacer\.gif".+?height="15">(.+?)<IMG'), webpage, 'title', fatal=False)
|
||||
|
||||
release_date = _parse_japanese_date(self._html_search_regex(
|
||||
r'開会日</td>\s*<td.+?/td>\s*<TD>(.+?)</TD>',
|
||||
webpage, 'title', fatal=False))
|
||||
|
||||
chapters = []
|
||||
for chp in re.finditer(r'(?i)<A\s+HREF="([^"]+?)"\s*class="play_vod">(?!<img)(.+)</[Aa]>', webpage):
|
||||
chapters.append({
|
||||
'title': clean_html(chp.group(2)).strip(),
|
||||
'start_time': try_call(lambda: float(parse_qs(chp.group(1))['time'][0].strip())),
|
||||
})
|
||||
# NOTE: there are blanks at the first and the end of the videos,
|
||||
# so getting/providing the video duration is not possible
|
||||
# also, the exact end_time for the last chapter is unknown (we can get at most minutes of granularity)
|
||||
last_tr = re.findall(r'(?s)<TR\s*class="s14_24">(.+?)</TR>', webpage)[-1]
|
||||
if last_tr and chapters:
|
||||
last_td = re.findall(r'<TD.+?</TD>', last_tr)[-1]
|
||||
if last_td:
|
||||
chapters[-1]['end_time'] = chapters[-1]['start_time'] + _parse_japanese_duration(clean_html(last_td))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'release_date': release_date,
|
||||
'chapters': chapters,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class SangiinInstructionIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
|
||||
IE_DESC = False # this shouldn't be listed as a supported site
|
||||
|
||||
def _real_extract(self, url):
|
||||
raise ExtractorError('Copy the link from the botton below the video description or player, and use the link to download. If there are no button in the frame, get the URL of the frame showing the video.', expected=True)
|
||||
|
||||
|
||||
class SangiinIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.webtv\.sangiin\.go\.jp/webtv/detail\.php\?sid=(?P<id>\d+)'
|
||||
IE_DESC = '参議院インターネット審議中継 (archive)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.webtv.sangiin.go.jp/webtv/detail.php?sid=7052',
|
||||
'info_dict': {
|
||||
'id': '7052',
|
||||
'title': '2022年10月7日 本会議',
|
||||
'description': 'md5:0a5fed523f95c88105a0b0bf1dd71489',
|
||||
'upload_date': '20221007',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.webtv.sangiin.go.jp/webtv/detail.php?sid=7037',
|
||||
'info_dict': {
|
||||
'id': '7037',
|
||||
'title': '2022年10月3日 開会式',
|
||||
'upload_date': '20221003',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.webtv.sangiin.go.jp/webtv/detail.php?sid=7076',
|
||||
'info_dict': {
|
||||
'id': '7076',
|
||||
'title': '2022年10月27日 法務委員会',
|
||||
'upload_date': '20221027',
|
||||
'ext': 'mp4',
|
||||
'is_live': True,
|
||||
},
|
||||
'skip': 'this live is turned into archive after it ends',
|
||||
}, ]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
date = self._html_search_regex(
|
||||
r'<dt[^>]*>\s*開会日\s*</dt>\s*<dd[^>]*>\s*(.+?)\s*</dd>', webpage,
|
||||
'date', fatal=False)
|
||||
upload_date = _parse_japanese_date(date)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<dt[^>]*>\s*会議名\s*</dt>\s*<dd[^>]*>\s*(.+?)\s*</dd>', webpage,
|
||||
'date', fatal=False)
|
||||
|
||||
# some videos don't have the elements, so assume it's missing
|
||||
description = self._html_search_regex(
|
||||
r'会議の経過\s*</h3>\s*<span[^>]*>(.+?)</span>', webpage,
|
||||
'description', default=None)
|
||||
|
||||
# this row appears only when it's livestream
|
||||
is_live = bool(self._html_search_regex(
|
||||
r'<dt[^>]*>\s*公報掲載時刻\s*</dt>\s*<dd[^>]*>\s*(.+?)\s*</dd>', webpage,
|
||||
'is_live', default=None))
|
||||
|
||||
m3u8_url = self._search_regex(
|
||||
r'var\s+videopath\s*=\s*(["\'])([^"\']+)\1', webpage,
|
||||
'm3u8 url', group=2)
|
||||
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': join_nonempty(date, title, delim=' '),
|
||||
'description': description,
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
'is_live': is_live,
|
||||
}
|
||||
@@ -15,13 +15,14 @@ from ..utils import (
|
||||
unsmuggle_url,
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
remove_start
|
||||
)
|
||||
|
||||
|
||||
class KalturaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
kaltura:(?P<partner_id>\d+):(?P<id>[0-9a-z_]+)|
|
||||
kaltura:(?P<partner_id>\w+):(?P<id>\w+)(?::(?P<player_type>\w+))?|
|
||||
https?://
|
||||
(:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
|
||||
(?:
|
||||
@@ -56,6 +57,7 @@ class KalturaIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://.*/thumbnail/.*',
|
||||
'timestamp': int,
|
||||
},
|
||||
'skip': 'The access to this service is forbidden since the specified partner is blocked'
|
||||
},
|
||||
{
|
||||
'url': 'http://www.kaltura.com/index.php/kwidget/cache_st/1300318621/wid/_269692/uiconf_id/3873291/entry_id/1_1jc2y3e4',
|
||||
@@ -108,6 +110,80 @@ class KalturaIE(InfoExtractor):
|
||||
# unavailable source format
|
||||
'url': 'kaltura:513551:1_66x4rg7o',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# html5lib URL using kwidget player
|
||||
'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.46/mwEmbedFrame.php/p/691292/uiconf_id/20499062/entry_id/0_c076mna6?wid=_691292&iframeembed=true&playerId=kaltura_player_1420508608&entry_id=0_c076mna6&flashvars%5BakamaiHD.loadingPolicy%5D=preInitialize&flashvars%5BakamaiHD.asyncInit%5D=true&flashvars%5BstreamerType%5D=hdnetwork',
|
||||
'info_dict': {
|
||||
'id': '0_c076mna6',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:4883e7acbcbf42583a2dddc97dee4855',
|
||||
'duration': 3608,
|
||||
'uploader_id': 'commons@swinburne.edu.au',
|
||||
'timestamp': 1408086874,
|
||||
'view_count': int,
|
||||
'upload_date': '20140815',
|
||||
'thumbnail': 'http://cfvod.kaltura.com/p/691292/sp/69129200/thumbnail/entry_id/0_c076mna6/version/100022',
|
||||
}
|
||||
},
|
||||
{
|
||||
# html5lib playlist URL using kwidget player
|
||||
'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.89/mwEmbedFrame.php/p/2019031/uiconf_id/40436601?wid=1_4j3m32cv&iframeembed=true&playerId=kaltura_player_&flashvars[playlistAPI.kpl0Id]=1_jovey5nu&flashvars[ks]=&&flashvars[imageDefaultDuration]=30&flashvars[localizationCode]=en&flashvars[leadWithHTML5]=true&flashvars[forceMobileHTML5]=true&flashvars[nextPrevBtn.plugin]=true&flashvars[hotspots.plugin]=true&flashvars[sideBarContainer.plugin]=true&flashvars[sideBarContainer.position]=left&flashvars[sideBarContainer.clickToClose]=true&flashvars[chapters.plugin]=true&flashvars[chapters.layout]=vertical&flashvars[chapters.thumbnailRotator]=false&flashvars[streamSelector.plugin]=true&flashvars[EmbedPlayer.SpinnerTarget]=videoHolder&flashvars[dualScreen.plugin]=true&flashvars[playlistAPI.playlistUrl]=https://canvasgatechtest.kaf.kaltura.com/playlist/details/{playlistAPI.kpl0Id}/categoryid/126428551',
|
||||
'info_dict': {
|
||||
'id': '1_jovey5nu',
|
||||
'title': '00-00 Introduction'
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
'info_dict': {
|
||||
'id': '1_b1y5hlvx',
|
||||
'ext': 'mp4',
|
||||
'title': 'CS7646_00-00 Introductio_Introduction',
|
||||
'duration': 91,
|
||||
'thumbnail': 'http://cfvod.kaltura.com/p/2019031/sp/201903100/thumbnail/entry_id/1_b1y5hlvx/version/100001',
|
||||
'view_count': int,
|
||||
'timestamp': 1533154447,
|
||||
'upload_date': '20180801',
|
||||
'uploader_id': 'djoyner3',
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '1_jfb7mdpn',
|
||||
'ext': 'mp4',
|
||||
'title': 'CS7646_00-00 Introductio_Three parts to the course',
|
||||
'duration': 63,
|
||||
'thumbnail': 'http://cfvod.kaltura.com/p/2019031/sp/201903100/thumbnail/entry_id/1_jfb7mdpn/version/100001',
|
||||
'view_count': int,
|
||||
'timestamp': 1533154489,
|
||||
'upload_date': '20180801',
|
||||
'uploader_id': 'djoyner3',
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '1_8xflxdp7',
|
||||
'ext': 'mp4',
|
||||
'title': 'CS7646_00-00 Introductio_Textbooks',
|
||||
'duration': 37,
|
||||
'thumbnail': 'http://cfvod.kaltura.com/p/2019031/sp/201903100/thumbnail/entry_id/1_8xflxdp7/version/100001',
|
||||
'view_count': int,
|
||||
'timestamp': 1533154512,
|
||||
'upload_date': '20180801',
|
||||
'uploader_id': 'djoyner3',
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '1_3hqew8kn',
|
||||
'ext': 'mp4',
|
||||
'title': 'CS7646_00-00 Introductio_Prerequisites',
|
||||
'duration': 49,
|
||||
'thumbnail': 'http://cfvod.kaltura.com/p/2019031/sp/201903100/thumbnail/entry_id/1_3hqew8kn/version/100001',
|
||||
'view_count': int,
|
||||
'timestamp': 1533154536,
|
||||
'upload_date': '20180801',
|
||||
'uploader_id': 'djoyner3',
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
@@ -187,7 +263,14 @@ class KalturaIE(InfoExtractor):
|
||||
|
||||
return data
|
||||
|
||||
def _get_video_info(self, video_id, partner_id, service_url=None):
|
||||
def _get_video_info(self, video_id, partner_id, service_url=None, player_type='html5'):
|
||||
assert player_type in ('html5', 'kwidget')
|
||||
if player_type == 'kwidget':
|
||||
return self._get_video_info_kwidget(video_id, partner_id, service_url)
|
||||
|
||||
return self._get_video_info_html5(video_id, partner_id, service_url)
|
||||
|
||||
def _get_video_info_html5(self, video_id, partner_id, service_url=None):
|
||||
actions = [
|
||||
{
|
||||
'apiVersion': '3.3.0',
|
||||
@@ -200,8 +283,9 @@ class KalturaIE(InfoExtractor):
|
||||
'expiry': 86400,
|
||||
'service': 'session',
|
||||
'action': 'startWidgetSession',
|
||||
'widgetId': '_%s' % partner_id,
|
||||
'widgetId': self._build_widget_id(partner_id),
|
||||
},
|
||||
# info
|
||||
{
|
||||
'action': 'list',
|
||||
'filter': {'redirectFromEntryId': video_id},
|
||||
@@ -212,12 +296,14 @@ class KalturaIE(InfoExtractor):
|
||||
'fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId',
|
||||
},
|
||||
},
|
||||
# flavor_assets
|
||||
{
|
||||
'action': 'getbyentryid',
|
||||
'entryId': video_id,
|
||||
'service': 'flavorAsset',
|
||||
'ks': '{1:result:ks}',
|
||||
},
|
||||
# captions
|
||||
{
|
||||
'action': 'list',
|
||||
'filter:entryIdEqual': video_id,
|
||||
@@ -226,17 +312,85 @@ class KalturaIE(InfoExtractor):
|
||||
},
|
||||
]
|
||||
return self._kaltura_api_call(
|
||||
video_id, actions, service_url, note='Downloading video info JSON')
|
||||
video_id, actions, service_url, note='Downloading video info JSON (Kaltura html5 player)')
|
||||
|
||||
def _get_video_info_kwidget(self, video_id, partner_id, service_url=None):
|
||||
actions = [
|
||||
{
|
||||
'service': 'multirequest',
|
||||
'apiVersion': '3.1',
|
||||
'expiry': 86400,
|
||||
'clientTag': 'kwidget:v2.89',
|
||||
'format': 1, # JSON, 2 = XML, 3 = PHP
|
||||
'ignoreNull': 1,
|
||||
'action': 'null',
|
||||
},
|
||||
# header
|
||||
{
|
||||
'expiry': 86400,
|
||||
'service': 'session',
|
||||
'action': 'startWidgetSession',
|
||||
'widgetId': self._build_widget_id(partner_id),
|
||||
},
|
||||
# (empty)
|
||||
{
|
||||
'expiry': 86400,
|
||||
'service': 'session',
|
||||
'action': 'startwidgetsession',
|
||||
'widgetId': self._build_widget_id(partner_id),
|
||||
'format': 9,
|
||||
'apiVersion': '3.1',
|
||||
'clientTag': 'kwidget:v2.89',
|
||||
'ignoreNull': 1,
|
||||
'ks': '{1:result:ks}'
|
||||
},
|
||||
# info
|
||||
{
|
||||
'action': 'list',
|
||||
'filter': {'redirectFromEntryId': video_id},
|
||||
'service': 'baseentry',
|
||||
'ks': '{1:result:ks}',
|
||||
'responseProfile': {
|
||||
'type': 1,
|
||||
'fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId',
|
||||
},
|
||||
},
|
||||
# flavor_assets
|
||||
{
|
||||
'action': 'getbyentryid',
|
||||
'entryId': video_id,
|
||||
'service': 'flavorAsset',
|
||||
'ks': '{1:result:ks}',
|
||||
},
|
||||
# captions
|
||||
{
|
||||
'action': 'list',
|
||||
'filter:entryIdEqual': video_id,
|
||||
'service': 'caption_captionasset',
|
||||
'ks': '{1:result:ks}',
|
||||
},
|
||||
]
|
||||
# second object (representing the second start widget session) is None
|
||||
header, _, _info, flavor_assets, captions = self._kaltura_api_call(
|
||||
video_id, actions, service_url, note='Downloading video info JSON (Kaltura kwidget player)')
|
||||
info = _info['objects'][0]
|
||||
return header, info, flavor_assets, captions
|
||||
|
||||
def _build_widget_id(self, partner_id):
|
||||
return partner_id if '_' in partner_id else f'_{partner_id}'
|
||||
|
||||
IFRAME_PACKAGE_DATA_REGEX = r'window\.kalturaIframePackageData\s*='
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
mobj = self._match_valid_url(url)
|
||||
partner_id, entry_id = mobj.group('partner_id', 'id')
|
||||
ks = None
|
||||
captions = None
|
||||
partner_id, entry_id, player_type = mobj.group('partner_id', 'id', 'player_type')
|
||||
ks, captions = None, None
|
||||
if not player_type:
|
||||
player_type = 'kwidget' if 'html5lib/v2' in url else 'html5'
|
||||
if partner_id and entry_id:
|
||||
_, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url'))
|
||||
_, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url'), player_type=player_type)
|
||||
else:
|
||||
path, query = mobj.group('path', 'query')
|
||||
if not path and not query:
|
||||
@@ -248,7 +402,7 @@ class KalturaIE(InfoExtractor):
|
||||
splitted_path = path.split('/')
|
||||
params.update(dict((zip(splitted_path[::2], [[v] for v in splitted_path[1::2]]))))
|
||||
if 'wid' in params:
|
||||
partner_id = params['wid'][0][1:]
|
||||
partner_id = remove_start(params['wid'][0], '_')
|
||||
elif 'p' in params:
|
||||
partner_id = params['p'][0]
|
||||
elif 'partner_id' in params:
|
||||
@@ -257,14 +411,13 @@ class KalturaIE(InfoExtractor):
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
if 'entry_id' in params:
|
||||
entry_id = params['entry_id'][0]
|
||||
_, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id)
|
||||
_, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id, player_type=player_type)
|
||||
elif 'uiconf_id' in params and 'flashvars[referenceId]' in params:
|
||||
reference_id = params['flashvars[referenceId]'][0]
|
||||
webpage = self._download_webpage(url, reference_id)
|
||||
entry_data = self._parse_json(self._search_regex(
|
||||
r'window\.kalturaIframePackageData\s*=\s*({.*});',
|
||||
webpage, 'kalturaIframePackageData'),
|
||||
reference_id)['entryResult']
|
||||
entry_data = self._search_json(
|
||||
self.IFRAME_PACKAGE_DATA_REGEX, webpage,
|
||||
'kalturaIframePackageData', reference_id)['entryResult']
|
||||
info, flavor_assets = entry_data['meta'], entry_data['contextData']['flavorAssets']
|
||||
entry_id = info['id']
|
||||
# Unfortunately, data returned in kalturaIframePackageData lacks
|
||||
@@ -272,16 +425,29 @@ class KalturaIE(InfoExtractor):
|
||||
# regular approach since we now know the entry_id
|
||||
try:
|
||||
_, info, flavor_assets, captions = self._get_video_info(
|
||||
entry_id, partner_id)
|
||||
entry_id, partner_id, player_type=player_type)
|
||||
except ExtractorError:
|
||||
# Regular scenario failed but we already have everything
|
||||
# extracted apart from captions and can process at least
|
||||
# with this
|
||||
pass
|
||||
elif 'uiconf_id' in params and 'flashvars[playlistAPI.kpl0Id]' in params:
|
||||
playlist_id = params['flashvars[playlistAPI.kpl0Id]'][0]
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
playlist_data = self._search_json(
|
||||
self.IFRAME_PACKAGE_DATA_REGEX, webpage,
|
||||
'kalturaIframePackageData', playlist_id)['playlistResult']
|
||||
return self.playlist_from_matches(
|
||||
traverse_obj(playlist_data, (playlist_id, 'items', ..., 'id')),
|
||||
playlist_id, traverse_obj(playlist_data, (playlist_id, 'name')),
|
||||
ie=KalturaIE, getter=lambda x: f'kaltura:{partner_id}:{x}:{player_type}')
|
||||
else:
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
ks = params.get('flashvars[ks]', [None])[0]
|
||||
|
||||
return self._per_video_extract(smuggled_data, entry_id, info, ks, flavor_assets, captions)
|
||||
|
||||
def _per_video_extract(self, smuggled_data, entry_id, info, ks, flavor_assets, captions):
|
||||
source_url = smuggled_data.get('source_url')
|
||||
if source_url:
|
||||
referrer = base64.b64encode(
|
||||
|
||||
@@ -24,10 +24,14 @@ class LBRYBaseIE(InfoExtractor):
|
||||
_SUPPORTED_STREAM_TYPES = ['video', 'audio']
|
||||
|
||||
def _call_api_proxy(self, method, display_id, params, resource):
|
||||
headers = {'Content-Type': 'application/json-rpc'}
|
||||
token = try_get(self._get_cookies('https://odysee.com'), lambda x: x['auth_token'].value)
|
||||
if token:
|
||||
headers['x-lbry-auth-token'] = token
|
||||
response = self._download_json(
|
||||
'https://api.lbry.tv/api/v1/proxy',
|
||||
display_id, 'Downloading %s JSON metadata' % resource,
|
||||
headers={'Content-Type': 'application/json-rpc'},
|
||||
headers=headers,
|
||||
data=json.dumps({
|
||||
'method': method,
|
||||
'params': params,
|
||||
@@ -159,6 +163,29 @@ class LBRYIE(LBRYBaseIE):
|
||||
'thumbnail': 'https://thumbnails.lbry.com/AgHSc_HzrrE',
|
||||
'license': 'Copyrighted (contact publisher)',
|
||||
}
|
||||
}, {
|
||||
# HLS live stream (might expire)
|
||||
'url': 'https://odysee.com/@RT:fd/livestream_RT:d',
|
||||
'info_dict': {
|
||||
'id': 'fdd11cb3ab75f95efb7b3bc2d726aa13ac915b66',
|
||||
'ext': 'mp4',
|
||||
'live_status': 'is_live',
|
||||
'title': 'startswith:RT News | Livestream 24/7',
|
||||
'description': 'md5:fe68d0056dfe79c1a6b8ce8c34d5f6fa',
|
||||
'timestamp': int,
|
||||
'upload_date': str,
|
||||
'release_timestamp': int,
|
||||
'release_date': str,
|
||||
'tags': list,
|
||||
'duration': None,
|
||||
'channel': 'RT',
|
||||
'channel_id': 'fdd11cb3ab75f95efb7b3bc2d726aa13ac915b66',
|
||||
'channel_url': 'https://odysee.com/@RT:fdd11cb3ab75f95efb7b3bc2d726aa13ac915b66',
|
||||
'formats': 'mincount:1',
|
||||
'thumbnail': 'startswith:https://thumb',
|
||||
'license': 'None',
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}, {
|
||||
'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
|
||||
'only_matching': True,
|
||||
@@ -197,22 +224,24 @@ class LBRYIE(LBRYBaseIE):
|
||||
display_id = compat_urllib_parse_unquote(display_id)
|
||||
uri = 'lbry://' + display_id
|
||||
result = self._resolve_url(uri, display_id, 'stream')
|
||||
headers = {'Referer': 'https://odysee.com/'}
|
||||
if result['value'].get('stream_type') in self._SUPPORTED_STREAM_TYPES:
|
||||
claim_id, is_live, headers = result['claim_id'], False, {}
|
||||
claim_id, is_live = result['claim_id'], False
|
||||
streaming_url = self._call_api_proxy(
|
||||
'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
|
||||
final_url = self._request_webpage(
|
||||
HEADRequest(streaming_url), display_id,
|
||||
HEADRequest(streaming_url), display_id, headers=headers,
|
||||
note='Downloading streaming redirect url info').geturl()
|
||||
elif result.get('value_type') == 'stream':
|
||||
claim_id, is_live = result['signing_channel']['claim_id'], True
|
||||
headers = {'referer': 'https://player.odysee.live/'}
|
||||
live_data = self._download_json(
|
||||
'https://api.odysee.live/livestream/is_live', claim_id,
|
||||
query={'channel_claim_id': claim_id},
|
||||
note='Downloading livestream JSON metadata')['data']
|
||||
streaming_url = final_url = live_data.get('VideoURL')
|
||||
if not final_url and not live_data.get('Live'):
|
||||
# Upcoming videos may still give VideoURL
|
||||
if not live_data.get('Live'):
|
||||
streaming_url = final_url = None
|
||||
self.raise_no_formats('This stream is not live', True, claim_id)
|
||||
else:
|
||||
raise UnsupportedError(url)
|
||||
|
||||
86
yt_dlp/extractor/listennotes.py
Normal file
86
yt_dlp/extractor/listennotes.py
Normal file
@@ -0,0 +1,86 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_id,
|
||||
get_element_text_and_html_by_tag,
|
||||
parse_duration,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
)
|
||||
|
||||
|
||||
class ListenNotesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?listennotes\.com/podcasts/[^/]+/[^/]+-(?P<id>.+)/'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.listennotes.com/podcasts/thriving-on-overload/tim-oreilly-on-noticing-KrDgvNb_u1n/',
|
||||
'md5': '5b91a32f841e5788fb82b72a1a8af7f7',
|
||||
'info_dict': {
|
||||
'id': 'KrDgvNb_u1n',
|
||||
'ext': 'mp3',
|
||||
'title': 'md5:32236591a921adf17bbdbf0441b6c0e9',
|
||||
'description': 'md5:c581ed197eeddcee55a67cdb547c8cbd',
|
||||
'duration': 2148.0,
|
||||
'channel': 'Thriving on Overload',
|
||||
'channel_id': 'ed84wITivxF',
|
||||
'episode_id': 'e1312583fa7b4e24acfbb5131050be00',
|
||||
'thumbnail': 'https://production.listennotes.com/podcasts/thriving-on-overload-ross-dawson-1wb_KospA3P-ed84wITivxF.300x300.jpg',
|
||||
'channel_url': 'https://www.listennotes.com/podcasts/thriving-on-overload-ross-dawson-ed84wITivxF/',
|
||||
'cast': ['Tim O’Reilly', 'Cookie Monster', 'Lao Tzu', 'Wallace Steven', 'Eric Raymond', 'Christine Peterson', 'John Maynard Keyne', 'Ross Dawson'],
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.listennotes.com/podcasts/ask-noah-show/episode-177-wireguard-with-lwEA3154JzG/',
|
||||
'md5': '62fb4ffe7fc525632a1138bf72a5ce53',
|
||||
'info_dict': {
|
||||
'id': 'lwEA3154JzG',
|
||||
'ext': 'mp3',
|
||||
'title': 'Episode 177: WireGuard with Jason Donenfeld',
|
||||
'description': 'md5:24744f36456a3e95f83c1193a3458594',
|
||||
'duration': 3861.0,
|
||||
'channel': 'Ask Noah Show',
|
||||
'channel_id': '4DQTzdS5-j7',
|
||||
'episode_id': '8c8954b95e0b4859ad1eecec8bf6d3a4',
|
||||
'channel_url': 'https://www.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-4DQTzdS5-j7/',
|
||||
'thumbnail': 'https://production.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-cfbRUw9Gs3F-4DQTzdS5-j7.300x300.jpg',
|
||||
'cast': ['noah showlink', 'noah show', 'noah dashboard', 'jason donenfeld'],
|
||||
}
|
||||
}]
|
||||
|
||||
def _clean_description(self, description):
|
||||
return clean_html(re.sub(r'(</?(div|p)>\s*)+', '<br/><br/>', description or ''))
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, audio_id)
|
||||
data = self._search_json(
|
||||
r'<script id="original-content"[^>]+\btype="application/json">', webpage, 'content', audio_id)
|
||||
data.update(extract_attributes(get_element_html_by_id(
|
||||
r'episode-play-button-toolbar|episode-no-play-button-toolbar', webpage, escape_value=False)))
|
||||
|
||||
duration, description = self._search_regex(
|
||||
r'(?P<duration>[\d:]+)\s*-\s*(?P<description>.+)',
|
||||
self._html_search_meta(['og:description', 'description', 'twitter:description'], webpage),
|
||||
'description', fatal=False, group=('duration', 'description')) or (None, None)
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'url': data['audio'],
|
||||
'title': (data.get('data-title')
|
||||
or try_call(lambda: get_element_text_and_html_by_tag('h1', webpage)[0])
|
||||
or self._html_search_meta(('og:title', 'title', 'twitter:title'), webpage, 'title')),
|
||||
'description': (self._clean_description(get_element_by_class('ln-text-p', webpage))
|
||||
or strip_or_none(description)),
|
||||
'duration': parse_duration(traverse_obj(data, 'audio_length', 'data-duration') or duration),
|
||||
'episode_id': traverse_obj(data, 'uuid', 'data-episode-uuid'),
|
||||
**traverse_obj(data, {
|
||||
'thumbnail': 'data-image',
|
||||
'channel': 'data-channel-title',
|
||||
'cast': ('nlp_entities', ..., 'name'),
|
||||
'channel_url': 'channel_url',
|
||||
'channel_id': 'channel_short_uuid',
|
||||
})
|
||||
}
|
||||
@@ -3,7 +3,7 @@ from ..utils import format_field, traverse_obj, unified_timestamp
|
||||
|
||||
|
||||
class LivestreamfailsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?livestreamfails\.com/clip/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?livestreamfails\.com/(?:clip|post)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://livestreamfails.com/clip/139200',
|
||||
'md5': '8a03aea1a46e94a05af6410337463102',
|
||||
@@ -17,6 +17,9 @@ class LivestreamfailsIE(InfoExtractor):
|
||||
'timestamp': 1656271785,
|
||||
'upload_date': '20220626',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://livestreamfails.com/post/139200',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@@ -17,17 +21,20 @@ class ManyVidsIE(InfoExtractor):
|
||||
'id': '133957',
|
||||
'ext': 'mp4',
|
||||
'title': 'everthing about me (Preview)',
|
||||
'uploader': 'ellyxxix',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
# full video
|
||||
'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
|
||||
'md5': 'f3e8f7086409e9b470e2643edb96bdcc',
|
||||
'md5': 'bb47bab0e0802c2a60c24ef079dfe60f',
|
||||
'info_dict': {
|
||||
'id': '935718',
|
||||
'ext': 'mp4',
|
||||
'title': 'MY FACE REVEAL',
|
||||
'description': 'md5:ec5901d41808b3746fed90face161612',
|
||||
'uploader': 'Sarah Calanthe',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
@@ -36,17 +43,50 @@ class ManyVidsIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
real_url = 'https://www.manyvids.com/video/%s/gtm.js' % (video_id, )
|
||||
try:
|
||||
webpage = self._download_webpage(real_url, video_id)
|
||||
except Exception:
|
||||
# probably useless fallback
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'video URL', group='url')
|
||||
info = self._search_regex(
|
||||
r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''',
|
||||
webpage, 'meta details', default='')
|
||||
info = extract_attributes(info)
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
|
||||
r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
|
||||
webpage, 'title', default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title', fatal=True)
|
||||
player = self._search_regex(
|
||||
r'''(<div\b[^>]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''',
|
||||
webpage, 'player details', default='')
|
||||
player = extract_attributes(player)
|
||||
|
||||
video_urls_and_ids = (
|
||||
(info.get('data-meta-video'), 'video'),
|
||||
(player.get('data-video-transcoded'), 'transcoded'),
|
||||
(player.get('data-video-filepath'), 'filepath'),
|
||||
(self._og_search_video_url(webpage, secure=False, default=None), 'og_video'),
|
||||
)
|
||||
|
||||
def txt_or_none(s, default=None):
|
||||
return (s.strip() or default) if isinstance(s, str) else default
|
||||
|
||||
uploader = txt_or_none(info.get('data-meta-author'))
|
||||
|
||||
def mung_title(s):
|
||||
if uploader:
|
||||
s = re.sub(r'^\s*%s\s+[|-]' % (re.escape(uploader), ), '', s)
|
||||
return txt_or_none(s)
|
||||
|
||||
title = (
|
||||
mung_title(info.get('data-meta-title'))
|
||||
or self._html_search_regex(
|
||||
(r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
|
||||
r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
|
||||
webpage, 'title', default=None)
|
||||
or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title', fatal=True))
|
||||
|
||||
title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title
|
||||
|
||||
if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
|
||||
title += ' (Preview)'
|
||||
@@ -59,7 +99,8 @@ class ManyVidsIE(InfoExtractor):
|
||||
# Sets some cookies
|
||||
self._download_webpage(
|
||||
'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
|
||||
video_id, fatal=False, data=urlencode_postdata({
|
||||
video_id, note='Setting format cookies', fatal=False,
|
||||
data=urlencode_postdata({
|
||||
'mvtoken': mv_token,
|
||||
'vid': video_id,
|
||||
}), headers={
|
||||
@@ -67,24 +108,56 @@ class ManyVidsIE(InfoExtractor):
|
||||
'X-Requested-With': 'XMLHttpRequest'
|
||||
})
|
||||
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
else:
|
||||
formats = [{'url': video_url}]
|
||||
formats = []
|
||||
for v_url, fmt in video_urls_and_ids:
|
||||
v_url = url_or_none(v_url)
|
||||
if not v_url:
|
||||
continue
|
||||
if determine_ext(v_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
v_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls'))
|
||||
else:
|
||||
formats.append({
|
||||
'url': v_url,
|
||||
'format_id': fmt,
|
||||
})
|
||||
|
||||
like_count = int_or_none(self._search_regex(
|
||||
r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
|
||||
'view count', default=None))
|
||||
self._remove_duplicate_formats(formats)
|
||||
|
||||
for f in formats:
|
||||
if f.get('height') is None:
|
||||
f['height'] = int_or_none(
|
||||
self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None))
|
||||
if '/preview/' in f['url']:
|
||||
f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview')))
|
||||
f['preference'] = -10
|
||||
if 'transcoded' in f['format_id']:
|
||||
f['preference'] = f.get('preference', -1) - 1
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
def get_likes():
|
||||
likes = self._search_regex(
|
||||
r'''(<a\b[^>]*\bdata-id\s*=\s*(['"])%s\2[^>]*>)''' % (video_id, ),
|
||||
webpage, 'likes', default='')
|
||||
likes = extract_attributes(likes)
|
||||
return int_or_none(likes.get('data-likes'))
|
||||
|
||||
def get_views():
|
||||
return str_to_int(self._html_search_regex(
|
||||
r'''(?s)<span\b[^>]*\bclass\s*=["']views-wrapper\b[^>]+>.+?<span\b[^>]+>\s*(\d[\d,.]*)\s*</span>''',
|
||||
webpage, 'view count', default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'formats': formats,
|
||||
'uploader': self._html_search_regex(r'<meta[^>]+name="author"[^>]*>([^<]+)', webpage, 'uploader'),
|
||||
'description': txt_or_none(info.get('data-meta-description')),
|
||||
'uploader': txt_or_none(info.get('data-meta-author')),
|
||||
'thumbnail': (
|
||||
url_or_none(info.get('data-meta-image'))
|
||||
or url_or_none(player.get('data-video-screenshot'))),
|
||||
'view_count': get_views(),
|
||||
'like_count': get_likes(),
|
||||
}
|
||||
|
||||
@@ -48,9 +48,7 @@ class MeipaiIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._og_search_title(
|
||||
webpage, default=None) or self._html_search_regex(
|
||||
r'<title[^>]*>([^<]+)</title>', webpage, 'title')
|
||||
title = self._generic_title('', webpage)
|
||||
|
||||
formats = []
|
||||
|
||||
|
||||
@@ -348,3 +348,36 @@ class MLBTVIE(InfoExtractor):
|
||||
'subtitles': subtitles,
|
||||
'http_headers': {'Authorization': f'Bearer {self._access_token}'},
|
||||
}
|
||||
|
||||
|
||||
class MLBArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.mlb\.com/news/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mlb.com/news/manny-machado-robs-guillermo-heredia-reacts',
|
||||
'info_dict': {
|
||||
'id': '36db7394-343c-4ea3-b8ca-ead2e61bca9a',
|
||||
'title': 'Machado\'s grab draws hilarious irate reaction',
|
||||
'modified_timestamp': 1650130737,
|
||||
'description': 'md5:a19d4eb0487b2cb304e9a176f6b67676',
|
||||
'modified_date': '20220416',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
apollo_cache_json = self._search_json(r'window\.initState\s*=', webpage, 'window.initState', display_id)['apolloCache']
|
||||
|
||||
content_data_id = traverse_obj(
|
||||
apollo_cache_json, ('ROOT_QUERY', lambda k, _: k.startswith('getForgeContent'), 'id'), get_all=False)
|
||||
|
||||
content_real_info = apollo_cache_json[content_data_id]
|
||||
|
||||
return self.playlist_from_matches(
|
||||
traverse_obj(content_real_info, ('parts', lambda _, v: v['typename'] == 'Video', 'id')),
|
||||
getter=lambda x: f'https://www.mlb.com/video/{apollo_cache_json[x]["slug"]}',
|
||||
ie=MLBVideoIE, playlist_id=content_real_info.get('_translationId'),
|
||||
title=self._html_search_meta('og:title', webpage),
|
||||
description=content_real_info.get('summary'),
|
||||
modified_timestamp=parse_iso8601(content_real_info.get('lastUpdatedDate')))
|
||||
|
||||
@@ -69,7 +69,7 @@ class MotherlessIE(InfoExtractor):
|
||||
'title': 'a/ Hot Teens',
|
||||
'categories': list,
|
||||
'upload_date': '20210104',
|
||||
'uploader_id': 'yonbiw',
|
||||
'uploader_id': 'anonymous',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
},
|
||||
@@ -123,11 +123,12 @@ class MotherlessIE(InfoExtractor):
|
||||
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
|
||||
upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
|
||||
|
||||
comment_count = webpage.count('class="media-comment-contents"')
|
||||
comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
|
||||
uploader_id = self._html_search_regex(
|
||||
(r'"media-meta-member">\s+<a href="/m/([^"]+)"',
|
||||
r'<span\b[^>]+\bclass="username">([^<]+)</span>'),
|
||||
(r'''<span\b[^>]+\bclass\s*=\s*["']username\b[^>]*>([^<]+)</span>''',
|
||||
r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)'''),
|
||||
webpage, 'uploader_id', fatal=False)
|
||||
|
||||
categories = self._html_search_meta('keywords', webpage, default=None)
|
||||
if categories:
|
||||
categories = [cat.strip() for cat in categories.split(',')]
|
||||
@@ -217,23 +218,23 @@ class MotherlessGroupIE(InfoExtractor):
|
||||
r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, fatal=False)
|
||||
page_count = self._int(self._search_regex(
|
||||
r'(\d+)</(?:a|span)><(?:a|span)[^>]+rel="next">',
|
||||
webpage, 'page_count', default=0), 'page_count')
|
||||
page_count = str_to_int(self._search_regex(
|
||||
r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b',
|
||||
webpage, 'page_count', default=0))
|
||||
if not page_count:
|
||||
message = self._search_regex(
|
||||
r'class="error-page"[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*',
|
||||
r'''class\s*=\s*['"]error-page\b[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*''',
|
||||
webpage, 'error_msg', default=None) or 'This group has no videos.'
|
||||
self.report_warning(message, group_id)
|
||||
page_count = 1
|
||||
PAGE_SIZE = 80
|
||||
|
||||
def _get_page(idx):
|
||||
if not page_count:
|
||||
return
|
||||
webpage = self._download_webpage(
|
||||
page_url, group_id, query={'page': idx + 1},
|
||||
note='Downloading page %d/%d' % (idx + 1, page_count)
|
||||
)
|
||||
if idx > 0:
|
||||
webpage = self._download_webpage(
|
||||
page_url, group_id, query={'page': idx + 1},
|
||||
note='Downloading page %d/%d' % (idx + 1, page_count)
|
||||
)
|
||||
for entry in self._extract_entries(webpage, url):
|
||||
yield entry
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -147,6 +148,17 @@ class MxplayerIE(InfoExtractor):
|
||||
'format': 'bv',
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.mxplayer.in/movie/watch-deewane-huye-paagal-movie-online-4f9175c40a11c3994182a65afdd37ec6?watch=true',
|
||||
'info_dict': {
|
||||
'id': '4f9175c40a11c3994182a65afdd37ec6',
|
||||
'display_id': 'watch-deewane-huye-paagal-movie-online',
|
||||
'title': 'Deewane Huye Paagal',
|
||||
'duration': 9037,
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:d17bd5c651016c4ed2e6f8a4ace15534',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -157,21 +169,24 @@ class MxplayerIE(InfoExtractor):
|
||||
data_json = self._download_json(
|
||||
f'https://api.mxplay.com/v1/web/detail/video?type={video_type}&id={video_id}', display_id)
|
||||
|
||||
streams = traverse_obj(data_json, ('stream', {'m3u8': ('hls', 'high'), 'mpd': ('dash', 'high')}))
|
||||
formats, dash_subs = self._extract_mpd_formats_and_subtitles(
|
||||
f'https://llvod.mxplay.com/{streams["mpd"]}', display_id, fatal=False)
|
||||
hls_frmts, hls_subs = self._extract_m3u8_formats_and_subtitles(
|
||||
f'https://llvod.mxplay.com/{streams["m3u8"]}', display_id, fatal=False)
|
||||
|
||||
formats.extend(hls_frmts)
|
||||
self._sort_formats(formats)
|
||||
formats, subtitles = [], {}
|
||||
m3u8_url = urljoin('https://llvod.mxplay.com/', traverse_obj(
|
||||
data_json, ('stream', (('thirdParty', 'hlsUrl'), ('hls', 'high'))), get_all=False))
|
||||
if m3u8_url:
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, display_id, 'mp4', fatal=False)
|
||||
mpd_url = urljoin('https://llvod.mxplay.com/', traverse_obj(
|
||||
data_json, ('stream', (('thirdParty', 'dashUrl'), ('dash', 'high'))), get_all=False))
|
||||
if mpd_url:
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(mpd_url, display_id, fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
season = traverse_obj(data_json, ('container', 'title'))
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': data_json.get('title'),
|
||||
'formats': formats,
|
||||
'subtitles': self._merge_subtitles(dash_subs, hls_subs),
|
||||
'subtitles': subtitles,
|
||||
'display_id': display_id,
|
||||
'duration': data_json.get('duration'),
|
||||
'series': traverse_obj(data_json, ('container', 'container', 'title')),
|
||||
|
||||
@@ -1,12 +1,26 @@
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
from base64 import b64encode
|
||||
from binascii import hexlify
|
||||
from datetime import datetime
|
||||
from hashlib import md5
|
||||
from random import randint
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str, compat_urllib_parse_urlencode
|
||||
from ..utils import float_or_none, sanitized_Request
|
||||
from ..aes import aes_ecb_encrypt, pkcs7_padding
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
bytes_to_intlist,
|
||||
error_to_compat_str,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
sanitized_Request,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class NetEaseMusicBaseIE(InfoExtractor):
|
||||
@@ -17,7 +31,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||
@classmethod
|
||||
def _encrypt(cls, dfsid):
|
||||
salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
|
||||
string_bytes = bytearray(compat_str(dfsid).encode('ascii'))
|
||||
string_bytes = bytearray(str(dfsid).encode('ascii'))
|
||||
salt_len = len(salt_bytes)
|
||||
for i in range(len(string_bytes)):
|
||||
string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
|
||||
@@ -26,32 +40,105 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||
result = b64encode(m.digest()).decode('ascii')
|
||||
return result.replace('/', '_').replace('+', '-')
|
||||
|
||||
def make_player_api_request_data_and_headers(self, song_id, bitrate):
|
||||
KEY = b'e82ckenh8dichen8'
|
||||
URL = '/api/song/enhance/player/url'
|
||||
now = int(time.time() * 1000)
|
||||
rand = randint(0, 1000)
|
||||
cookie = {
|
||||
'osver': None,
|
||||
'deviceId': None,
|
||||
'appver': '8.0.0',
|
||||
'versioncode': '140',
|
||||
'mobilename': None,
|
||||
'buildver': '1623435496',
|
||||
'resolution': '1920x1080',
|
||||
'__csrf': '',
|
||||
'os': 'pc',
|
||||
'channel': None,
|
||||
'requestId': '{0}_{1:04}'.format(now, rand),
|
||||
}
|
||||
request_text = json.dumps(
|
||||
{'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie},
|
||||
separators=(',', ':'))
|
||||
message = 'nobody{0}use{1}md5forencrypt'.format(
|
||||
URL, request_text).encode('latin1')
|
||||
msg_digest = md5(message).hexdigest()
|
||||
|
||||
data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format(
|
||||
URL, request_text, msg_digest)
|
||||
data = pkcs7_padding(bytes_to_intlist(data))
|
||||
encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY)))
|
||||
encrypted_params = hexlify(encrypted).decode('ascii').upper()
|
||||
|
||||
cookie = '; '.join(
|
||||
['{0}={1}'.format(k, v if v is not None else 'undefined')
|
||||
for [k, v] in cookie.items()])
|
||||
|
||||
headers = {
|
||||
'User-Agent': self.extractor.get_param('http_headers')['User-Agent'],
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Referer': 'https://music.163.com',
|
||||
'Cookie': cookie,
|
||||
}
|
||||
return ('params={0}'.format(encrypted_params), headers)
|
||||
|
||||
def _call_player_api(self, song_id, bitrate):
|
||||
url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
|
||||
data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
|
||||
try:
|
||||
msg = 'empty result'
|
||||
result = self._download_json(
|
||||
url, song_id, data=data.encode('ascii'), headers=headers)
|
||||
if result:
|
||||
return result
|
||||
except ExtractorError as e:
|
||||
if type(e.cause) in (ValueError, TypeError):
|
||||
# JSON load failure
|
||||
raise
|
||||
except Exception as e:
|
||||
msg = error_to_compat_str(e)
|
||||
self.report_warning('%s API call (%s) failed: %s' % (
|
||||
song_id, bitrate, msg))
|
||||
return {}
|
||||
|
||||
def extract_formats(self, info):
|
||||
err = 0
|
||||
formats = []
|
||||
song_id = info['id']
|
||||
for song_format in self._FORMATS:
|
||||
details = info.get(song_format)
|
||||
if not details:
|
||||
continue
|
||||
song_file_path = '/%s/%s.%s' % (
|
||||
self._encrypt(details['dfsId']), details['dfsId'], details['extension'])
|
||||
|
||||
# 203.130.59.9, 124.40.233.182, 115.231.74.139, etc is a reverse proxy-like feature
|
||||
# from NetEase's CDN provider that can be used if m5.music.126.net does not
|
||||
# work, especially for users outside of Mainland China
|
||||
# via: https://github.com/JixunMoe/unblock-163/issues/3#issuecomment-163115880
|
||||
for host in ('http://m5.music.126.net', 'http://115.231.74.139/m1.music.126.net',
|
||||
'http://124.40.233.182/m1.music.126.net', 'http://203.130.59.9/m1.music.126.net'):
|
||||
song_url = host + song_file_path
|
||||
bitrate = int_or_none(details.get('bitrate')) or 999000
|
||||
data = self._call_player_api(song_id, bitrate)
|
||||
for song in try_get(data, lambda x: x['data'], list) or []:
|
||||
song_url = try_get(song, lambda x: x['url'])
|
||||
if not song_url:
|
||||
continue
|
||||
if self._is_valid_url(song_url, info['id'], 'song'):
|
||||
formats.append({
|
||||
'url': song_url,
|
||||
'ext': details.get('extension'),
|
||||
'abr': float_or_none(details.get('bitrate'), scale=1000),
|
||||
'abr': float_or_none(song.get('br'), scale=1000),
|
||||
'format_id': song_format,
|
||||
'filesize': details.get('size'),
|
||||
'asr': details.get('sr')
|
||||
'filesize': int_or_none(song.get('size')),
|
||||
'asr': int_or_none(details.get('sr')),
|
||||
})
|
||||
break
|
||||
elif err == 0:
|
||||
err = try_get(song, lambda x: x['code'], int)
|
||||
|
||||
if not formats:
|
||||
msg = 'No media links found'
|
||||
if err != 0 and (err < 200 or err >= 400):
|
||||
raise ExtractorError(
|
||||
'%s (site code %d)' % (msg, err, ), expected=True)
|
||||
else:
|
||||
self.raise_geo_restricted(
|
||||
msg + ': probably this video is not available from your location due to geo restriction.',
|
||||
countries=['CN'])
|
||||
|
||||
return formats
|
||||
|
||||
@classmethod
|
||||
@@ -67,33 +154,19 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||
class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:song'
|
||||
IE_DESC = '网易云音乐'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://music.163.com/#/song?id=32102397',
|
||||
'md5': 'f2e97280e6345c74ba9d5677dd5dcb45',
|
||||
'md5': '3e909614ce09b1ccef4a3eb205441190',
|
||||
'info_dict': {
|
||||
'id': '32102397',
|
||||
'ext': 'mp3',
|
||||
'title': 'Bad Blood (feat. Kendrick Lamar)',
|
||||
'title': 'Bad Blood',
|
||||
'creator': 'Taylor Swift / Kendrick Lamar',
|
||||
'upload_date': '20150517',
|
||||
'timestamp': 1431878400,
|
||||
'description': 'md5:a10a54589c2860300d02e1de821eb2ef',
|
||||
'upload_date': '20150516',
|
||||
'timestamp': 1431792000,
|
||||
'description': 'md5:25fc5f27e47aad975aa6d36382c7833c',
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'note': 'No lyrics translation.',
|
||||
'url': 'http://music.163.com/#/song?id=29822014',
|
||||
'info_dict': {
|
||||
'id': '29822014',
|
||||
'ext': 'mp3',
|
||||
'title': '听见下雨的声音',
|
||||
'creator': '周杰伦',
|
||||
'upload_date': '20141225',
|
||||
'timestamp': 1419523200,
|
||||
'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c',
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'note': 'No lyrics.',
|
||||
'url': 'http://music.163.com/song?id=17241424',
|
||||
@@ -103,9 +176,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
'title': 'Opus 28',
|
||||
'creator': 'Dustin O\'Halloran',
|
||||
'upload_date': '20080211',
|
||||
'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4',
|
||||
'timestamp': 1202745600,
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'note': 'Has translated name.',
|
||||
'url': 'http://music.163.com/#/song?id=22735043',
|
||||
@@ -119,7 +192,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
'timestamp': 1264608000,
|
||||
'alt_title': '说出愿望吧(Genie)',
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
|
||||
'md5': '95826c73ea50b1c288b22180ec9e754d',
|
||||
'info_dict': {
|
||||
'id': '95670',
|
||||
'ext': 'mp3',
|
||||
'title': '国际歌',
|
||||
'creator': '马备',
|
||||
'upload_date': '19911130',
|
||||
'timestamp': 691516800,
|
||||
'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
|
||||
},
|
||||
}]
|
||||
|
||||
def _process_lyrics(self, lyrics_info):
|
||||
|
||||
@@ -321,8 +321,7 @@ class NhkForSchoolProgramListIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(f'https://www.nhk.or.jp/school/{program_id}/', program_id)
|
||||
|
||||
title = (self._og_search_title(webpage)
|
||||
or self._html_extract_title(webpage)
|
||||
title = (self._generic_title('', webpage)
|
||||
or self._html_search_regex(r'<h3>([^<]+?)とは?\s*</h3>', webpage, 'title', fatal=False))
|
||||
title = re.sub(r'\s*\|\s*NHK\s+for\s+School\s*$', '', title) if title else None
|
||||
description = self._html_search_regex(
|
||||
|
||||
@@ -231,7 +231,7 @@ class NiconicoIE(InfoExtractor):
|
||||
or self._parse_json(
|
||||
self._html_search_regex(
|
||||
'data-api-data="([^"]+)"',
|
||||
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id),
|
||||
self._download_webpage('https://www.nicovideo.jp/watch/' + video_id, video_id),
|
||||
'API data', default='{}'),
|
||||
video_id))
|
||||
|
||||
@@ -390,7 +390,7 @@ class NiconicoIE(InfoExtractor):
|
||||
|
||||
try:
|
||||
webpage, handle = self._download_webpage_handle(
|
||||
'http://www.nicovideo.jp/watch/' + video_id, video_id)
|
||||
'https://www.nicovideo.jp/watch/' + video_id, video_id)
|
||||
if video_id.startswith('so'):
|
||||
video_id = self._match_id(handle.geturl())
|
||||
|
||||
@@ -728,7 +728,7 @@ class NicovideoSearchBaseIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, item_id, query=query, note=note % {'page': page_num})
|
||||
results = re.findall(r'(?<=data-video-id=)["\']?(?P<videoid>.*?)(?=["\'])', webpage)
|
||||
for item in results:
|
||||
yield self.url_result(f'http://www.nicovideo.jp/watch/{item}', 'Niconico', item)
|
||||
yield self.url_result(f'https://www.nicovideo.jp/watch/{item}', 'Niconico', item)
|
||||
if not results:
|
||||
break
|
||||
|
||||
|
||||
95
yt_dlp/extractor/nosnl.py
Normal file
95
yt_dlp/extractor/nosnl.py
Normal file
@@ -0,0 +1,95 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_duration, parse_iso8601, traverse_obj
|
||||
|
||||
|
||||
class NOSNLArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://nos\.nl/((?!video)(\w+/)?\w+/)\d+-(?P<display_id>[\w-]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
# only 1 video
|
||||
'url': 'https://nos.nl/nieuwsuur/artikel/2440353-verzakking-door-droogte-dreigt-tot-een-miljoen-kwetsbare-huizen',
|
||||
'info_dict': {
|
||||
'id': '2440340',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:5f83185d902ac97af3af4bed7ece3db5',
|
||||
'title': '\'We hebben een huis vol met scheuren\'',
|
||||
'duration': 95.0,
|
||||
'thumbnail': 'https://cdn.nos.nl/image/2022/08/12/887149/3840x2160a.jpg',
|
||||
}
|
||||
}, {
|
||||
# more than 1 video
|
||||
'url': 'https://nos.nl/artikel/2440409-vannacht-sliepen-weer-enkele-honderden-asielzoekers-in-ter-apel-buiten',
|
||||
'info_dict': {
|
||||
'id': '2440409',
|
||||
'title': 'Vannacht sliepen weer enkele honderden asielzoekers in Ter Apel buiten',
|
||||
'description': 'Er werd wel geprobeerd om kwetsbare migranten onderdak te bieden, zegt het COA.',
|
||||
'tags': ['aanmeldcentrum', 'Centraal Orgaan opvang asielzoekers', 'COA', 'asielzoekers', 'Ter Apel'],
|
||||
'modified_timestamp': 1660452773,
|
||||
'modified_date': '20220814',
|
||||
'upload_date': '20220813',
|
||||
'thumbnail': 'https://cdn.nos.nl/image/2022/07/18/880346/1024x576a.jpg',
|
||||
'timestamp': 1660401384,
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# audio + video
|
||||
'url': 'https://nos.nl/artikel/2440789-wekdienst-16-8-groningse-acties-tien-jaar-na-zware-aardbeving-femke-bol-in-actie-op-ek-atletiek',
|
||||
'info_dict': {
|
||||
'id': '2440789',
|
||||
'title': 'Wekdienst 16/8: Groningse acties tien jaar na zware aardbeving • Femke Bol in actie op EK atletiek ',
|
||||
'description': 'Nieuws, weer, verkeer: met dit overzicht begin je geïnformeerd aan de dag.',
|
||||
'tags': ['wekdienst'],
|
||||
'modified_date': '20220816',
|
||||
'modified_timestamp': 1660625449,
|
||||
'timestamp': 1660625449,
|
||||
'upload_date': '20220816',
|
||||
'thumbnail': 'https://cdn.nos.nl/image/2022/08/16/888178/1024x576a.jpg',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}
|
||||
]
|
||||
|
||||
def _entries(self, nextjs_json, display_id):
|
||||
for item in nextjs_json['items']:
|
||||
if item.get('type') == 'video':
|
||||
formats, subtitle = self._extract_m3u8_formats_and_subtitles(
|
||||
traverse_obj(item, ('source', 'url')), display_id, ext='mp4')
|
||||
yield {
|
||||
'id': str(item['id']),
|
||||
'title': item.get('title'),
|
||||
'description': item.get('description'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitle,
|
||||
'duration': parse_duration(item.get('duration')),
|
||||
'thumbnails': [{
|
||||
'url': traverse_obj(image, ('url', ...), get_all=False),
|
||||
'width': image.get('width'),
|
||||
'height': image.get('height')
|
||||
} for image in traverse_obj(item, ('imagesByRatio', ...))[0]],
|
||||
}
|
||||
|
||||
elif item.get('type') == 'audio':
|
||||
yield {
|
||||
'id': str(item['id']),
|
||||
'title': item.get('title'),
|
||||
'url': traverse_obj(item, ('media', 'src')),
|
||||
'ext': 'mp3',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_valid_url(url).group('display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
nextjs_json = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['data']
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': self._entries(nextjs_json, display_id),
|
||||
'id': str(nextjs_json['id']),
|
||||
'title': nextjs_json.get('title') or self._html_search_meta(['title', 'og:title', 'twitter:title'], webpage),
|
||||
'description': (nextjs_json.get('description')
|
||||
or self._html_search_meta(['description', 'twitter:description', 'og:description'], webpage)),
|
||||
'tags': nextjs_json.get('keywords'),
|
||||
'modified_timestamp': parse_iso8601(nextjs_json.get('modifiedAt')),
|
||||
'thumbnail': nextjs_json.get('shareImageSrc') or self._html_search_meta(['og:image', 'twitter:image'], webpage),
|
||||
'timestamp': parse_iso8601(nextjs_json.get('publishedAt'))
|
||||
}
|
||||
@@ -58,8 +58,7 @@ class NRKBaseIE(InfoExtractor):
|
||||
return self._download_json(
|
||||
urljoin('https://psapi.nrk.no/', path),
|
||||
video_id, note or 'Downloading %s JSON' % item,
|
||||
fatal=fatal, query=query,
|
||||
headers={'Accept-Encoding': 'gzip, deflate, br'})
|
||||
fatal=fatal, query=query)
|
||||
|
||||
|
||||
class NRKIE(NRKBaseIE):
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import json
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -13,17 +14,20 @@ class NZHeraldIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nzherald\.co\.nz/[\w\/-]+\/(?P<id>[A-Z0-9]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.nzherald.co.nz/nz/weather-heavy-rain-gales-across-nz-most-days-this-week/PTG7QWY4E2225YHZ5NAIRBTYTQ/',
|
||||
# Video accessible under 'video' key
|
||||
'url': 'https://www.nzherald.co.nz/nz/queen-elizabeth-death-nz-public-holiday-announced-for-september-26/CEOPBSXO2JDCLNK3H7E3BIE2FA/',
|
||||
'info_dict': {
|
||||
'id': '6271084466001',
|
||||
'id': '6312191736112',
|
||||
'ext': 'mp4',
|
||||
'title': 'MetService severe weather warning: September 6th - 7th',
|
||||
'timestamp': 1630891576,
|
||||
'upload_date': '20210906',
|
||||
'title': 'Focus: PM holds post-Cabinet press conference',
|
||||
'duration': 238.08,
|
||||
'upload_date': '20220912',
|
||||
'uploader_id': '1308227299001',
|
||||
'description': 'md5:db6ca335a22e2cdf37ab9d2bcda52902'
|
||||
'timestamp': 1662957159,
|
||||
'tags': [],
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
'description': 'md5:2f17713fcbfcfbe38bb9e7dfccbb0f2e',
|
||||
}
|
||||
|
||||
}, {
|
||||
# Webpage has brightcove embed player url
|
||||
'url': 'https://www.nzherald.co.nz/travel/pencarrow-coastal-trail/HDVTPJEPP46HJ2UEMK4EGD2DFI/',
|
||||
@@ -34,9 +38,11 @@ class NZHeraldIE(InfoExtractor):
|
||||
'timestamp': 1625102897,
|
||||
'upload_date': '20210701',
|
||||
'uploader_id': '1308227299001',
|
||||
'description': 'md5:d361aaa0c6498f7ac1bc4fc0a0aec1e4'
|
||||
'description': 'md5:d361aaa0c6498f7ac1bc4fc0a0aec1e4',
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
'tags': ['travel', 'video'],
|
||||
'duration': 43.627,
|
||||
}
|
||||
|
||||
}, {
|
||||
# two video embeds of the same video
|
||||
'url': 'https://www.nzherald.co.nz/nz/truck-driver-captured-cutting-off-motorist-on-state-highway-1-in-canterbury/FIHNJB7PLLPHWQPK4S7ZBDUC4I/',
|
||||
@@ -48,6 +54,22 @@ class NZHeraldIE(InfoExtractor):
|
||||
'upload_date': '20210429',
|
||||
'uploader_id': '1308227299001',
|
||||
'description': 'md5:4cae7dfb7613ac4c73b9e73a75c6b5d7'
|
||||
},
|
||||
'skip': 'video removed',
|
||||
}, {
|
||||
# customVideo embed requiring additional API call
|
||||
'url': 'https://www.nzherald.co.nz/nz/politics/reserve-bank-rejects-political-criticisms-stands-by-review/2JO5Q4WLZRCBBNWTLACZMOP4RA/',
|
||||
'info_dict': {
|
||||
'id': '6315123873112',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1667862725,
|
||||
'title': 'Focus: Luxon on re-appointment of Reserve Bank governor Adrian Orr',
|
||||
'upload_date': '20221107',
|
||||
'description': 'md5:df2f1f7033a8160c66e28e4743f5d934',
|
||||
'uploader_id': '1308227299001',
|
||||
'tags': ['video', 'nz herald focus', 'politics', 'politics videos'],
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
'duration': 99.584,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.nzherald.co.nz/kahu/kaupapa-companies-my-taiao-supporting-maori-in-study-and-business/PQBO2J25WCG77VGRX7W7BVYEAI/',
|
||||
@@ -80,6 +102,12 @@ class NZHeraldIE(InfoExtractor):
|
||||
self._search_regex(r'Fusion\.globalContent\s*=\s*({.+?})\s*;', webpage, 'fusion metadata'), article_id)
|
||||
|
||||
video_metadata = fusion_metadata.get('video')
|
||||
if not video_metadata:
|
||||
custom_video_id = traverse_obj(fusion_metadata, ('customVideo', 'embed', 'id'), expected_type=str)
|
||||
if custom_video_id:
|
||||
video_metadata = self._download_json(
|
||||
'https://www.nzherald.co.nz/pf/api/v3/content/fetch/full-content-by-id', article_id,
|
||||
query={'query': json.dumps({'id': custom_video_id, 'site': 'nzh'}), '_website': 'nzh'})
|
||||
bc_video_id = traverse_obj(
|
||||
video_metadata or fusion_metadata, # fusion metadata is the video metadata for video-only pages
|
||||
'brightcoveId', ('content_elements', ..., 'referent', 'id'),
|
||||
|
||||
@@ -8,10 +8,12 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
unified_strdate,
|
||||
int_or_none,
|
||||
qualities,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@@ -22,7 +24,7 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
(?:(?:www|m|mobile)\.)?
|
||||
(?:odnoklassniki|ok)\.ru/
|
||||
(?:
|
||||
video(?:embed)?/|
|
||||
video(?P<embed>embed)?/|
|
||||
web-api/video/moviePlayer/|
|
||||
live/|
|
||||
dk\?.*?st\.mvId=
|
||||
@@ -38,7 +40,7 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1545580896,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://coub-anubis-a.akamaized.net/coub_storage/coub/simple/cw_image/c5ac87553bd/608e806a1239c210ab692/1545580913_00026.jpg',
|
||||
'thumbnail': 'https://coub-attachments.akamaized.net/coub_storage/coub/simple/cw_image/c5ac87553bd/608e806a1239c210ab692/1545580913_00026.jpg',
|
||||
'title': 'Народная забава',
|
||||
'uploader': 'Nevata',
|
||||
'upload_date': '20181223',
|
||||
@@ -65,11 +67,12 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
}, {
|
||||
# metadata in JSON
|
||||
'url': 'http://ok.ru/video/20079905452',
|
||||
'md5': '0b62089b479e06681abaaca9d204f152',
|
||||
'md5': '5d2b64756e2af296e3b383a0bc02a6aa',
|
||||
'info_dict': {
|
||||
'id': '20079905452',
|
||||
'ext': 'mp4',
|
||||
'title': 'Культура меняет нас (прекрасный ролик!))',
|
||||
'thumbnail': str,
|
||||
'duration': 100,
|
||||
'upload_date': '20141207',
|
||||
'uploader_id': '330537914540',
|
||||
@@ -80,11 +83,12 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
}, {
|
||||
# metadataUrl
|
||||
'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
|
||||
'md5': '6ff470ea2dd51d5d18c295a355b0b6bc',
|
||||
'md5': 'f8c951122516af72e6e6ffdd3c41103b',
|
||||
'info_dict': {
|
||||
'id': '63567059965189-0',
|
||||
'ext': 'mp4',
|
||||
'title': 'Девушка без комплексов ...',
|
||||
'thumbnail': str,
|
||||
'duration': 191,
|
||||
'upload_date': '20150518',
|
||||
'uploader_id': '534380003155',
|
||||
@@ -95,18 +99,32 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
# YouTube embed (metadataUrl, provider == USER_YOUTUBE)
|
||||
'url': 'http://ok.ru/video/64211978996595-1',
|
||||
'md5': '2f206894ffb5dbfcce2c5a14b909eea5',
|
||||
'url': 'https://ok.ru/video/3952212382174',
|
||||
'md5': '91749d0bd20763a28d083fa335bbd37a',
|
||||
'info_dict': {
|
||||
'id': 'V_VztHT5BzY',
|
||||
'id': '5axVgHHDBvU',
|
||||
'ext': 'mp4',
|
||||
'title': 'Космическая среда от 26 августа 2015',
|
||||
'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0',
|
||||
'duration': 440,
|
||||
'upload_date': '20150826',
|
||||
'uploader_id': 'tvroscosmos',
|
||||
'uploader': 'Телестудия Роскосмоса',
|
||||
'title': 'Youtube-dl 101: What is it and HOW to use it! Full Download Walkthrough and Guide',
|
||||
'description': 'md5:b57209eeb9d5c2f20c984dfb58862097',
|
||||
'uploader': 'Lod Mer',
|
||||
'uploader_id': '575186401502',
|
||||
'duration': 1529,
|
||||
'age_limit': 0,
|
||||
'upload_date': '20210405',
|
||||
'comment_count': int,
|
||||
'live_status': 'not_live',
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://i.mycdn.me/i?r=AEHujHvw2RjEbemUCNEorZbxYpb_p_9AcN2FmGik64Krkcmz37YtlY093oAM5-HIEAt7Zi9s0CiBOSDmbngC-I-k&fn=external_8',
|
||||
'uploader_url': 'http://www.youtube.com/user/MrKewlkid94',
|
||||
'channel_follower_count': int,
|
||||
'tags': ['youtube-dl', 'youtube playlists', 'download videos', 'download audio'],
|
||||
'channel_id': 'UCVGtvURtEURYHtJFUegdSug',
|
||||
'like_count': int,
|
||||
'availability': 'public',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCVGtvURtEURYHtJFUegdSug',
|
||||
'categories': ['Education'],
|
||||
'playable_in_embed': True,
|
||||
'channel': 'BornToReact',
|
||||
},
|
||||
}, {
|
||||
# YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
|
||||
@@ -126,10 +144,12 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'Video has not been found',
|
||||
}, {
|
||||
# TODO: HTTP Error 400: Bad Request, it only works if there's no cookies when downloading
|
||||
'note': 'Only available in mobile webpage',
|
||||
'url': 'https://m.ok.ru/video/2361249957145',
|
||||
'info_dict': {
|
||||
'id': '2361249957145',
|
||||
'ext': 'mp4',
|
||||
'title': 'Быковское крещение',
|
||||
'duration': 3038.181,
|
||||
},
|
||||
@@ -158,8 +178,37 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
# Paid video
|
||||
'url': 'https://ok.ru/video/954886983203',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://ok.ru/videoembed/2932705602075',
|
||||
'info_dict': {
|
||||
'id': '2932705602075',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://i.mycdn.me/videoPreview?id=1369902483995&type=37&idx=2&tkn=fqlnoQD_xwq5ovIlKfgNyU08qmM&fn=external_8',
|
||||
'title': 'Boosty для тебя!',
|
||||
'uploader_id': '597811038747',
|
||||
'like_count': 0,
|
||||
'duration': 35,
|
||||
},
|
||||
}]
|
||||
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://boosty.to/ikakprosto/posts/56cedaca-b56a-4dfd-b3ed-98c79cfa0167',
|
||||
'info_dict': {
|
||||
'id': '3950343629563',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://i.mycdn.me/videoPreview?id=2776238394107&type=37&idx=11&tkn=F3ejkUFcpuI4DnMRxrDGcH5YcmM&fn=external_8',
|
||||
'title': 'Заяц Бусти.mp4',
|
||||
'uploader_id': '571368965883',
|
||||
'like_count': 0,
|
||||
'duration': 10444,
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
for x in super()._extract_embed_urls(url, webpage):
|
||||
yield smuggle_url(x, {'referrer': url})
|
||||
|
||||
def _real_extract(self, url):
|
||||
try:
|
||||
return self._extract_desktop(url)
|
||||
@@ -174,16 +223,23 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
start_time = int_or_none(compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0])
|
||||
|
||||
video_id = self._match_id(url)
|
||||
url, smuggled = unsmuggle_url(url, {})
|
||||
video_id, is_embed = self._match_valid_url(url).group('id', 'embed')
|
||||
mode = 'videoembed' if is_embed else 'video'
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://ok.ru/video/%s' % video_id, video_id,
|
||||
note='Downloading desktop webpage')
|
||||
f'https://ok.ru/{mode}/{video_id}', video_id,
|
||||
note='Downloading desktop webpage',
|
||||
headers={'Referer': smuggled['referrer']} if smuggled.get('referrer') else {})
|
||||
|
||||
error = self._search_regex(
|
||||
r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
|
||||
webpage, 'error', default=None)
|
||||
if error:
|
||||
# Direct link from boosty
|
||||
if (error == 'The author of this video has not been found or is blocked'
|
||||
and not smuggled.get('referrer') and mode == 'videoembed'):
|
||||
return self._extract_desktop(smuggle_url(url, {'referrer': 'https://boosty.to'}))
|
||||
elif error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
|
||||
player = self._parse_json(
|
||||
@@ -270,7 +326,7 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
if provider == 'LIVE_TV_APP':
|
||||
info['title'] = title
|
||||
|
||||
quality = qualities(('4', '0', '1', '2', '3', '5'))
|
||||
quality = qualities(('4', '0', '1', '2', '3', '5', '6', '7'))
|
||||
|
||||
formats = [{
|
||||
'url': f['url'],
|
||||
|
||||
54
yt_dlp/extractor/oftv.py
Normal file
54
yt_dlp/extractor/oftv.py
Normal file
@@ -0,0 +1,54 @@
|
||||
from .common import InfoExtractor
|
||||
from .zype import ZypeIE
|
||||
from ..utils import traverse_obj
|
||||
|
||||
|
||||
class OfTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?of.tv/video/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://of.tv/video/627d7d95b353db0001dadd1a',
|
||||
'md5': 'cb9cd5db3bb9ee0d32bfd7e373d6ef0a',
|
||||
'info_dict': {
|
||||
'id': '627d7d95b353db0001dadd1a',
|
||||
'ext': 'mp4',
|
||||
'title': 'E1: Jacky vs Eric',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'average_rating': 0,
|
||||
'description': 'md5:dd16e3e2a8d27d922e7a989f85986853',
|
||||
'display_id': '',
|
||||
'duration': 1423,
|
||||
'timestamp': 1652391300,
|
||||
'upload_date': '20220512',
|
||||
'view_count': 0,
|
||||
'creator': 'This is Fire'
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
info = next(ZypeIE.extract_from_webpage(self._downloader, url, webpage))
|
||||
info['_type'] = 'url_transparent'
|
||||
info['creator'] = self._search_regex(r'<a[^>]+class=\"creator-name\"[^>]+>([^<]+)', webpage, 'creator')
|
||||
return info
|
||||
|
||||
|
||||
class OfTVPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?of.tv/creators/(?P<id>[a-zA-Z0-9-]+)/.?'
|
||||
_TESTS = [{
|
||||
'url': 'https://of.tv/creators/this-is-fire/',
|
||||
'playlist_count': 8,
|
||||
'info_dict': {
|
||||
'id': 'this-is-fire'
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
json_match = self._search_json(
|
||||
r'var\s*remaining_videos\s*=', webpage, 'oftv playlists', playlist_id, contains_pattern=r'\[.+\]')
|
||||
|
||||
return self.playlist_from_matches(
|
||||
traverse_obj(json_match, (..., 'discovery_url')), playlist_id)
|
||||
@@ -106,7 +106,6 @@ class OneNewsNZIE(InfoExtractor):
|
||||
|
||||
playlist_title = (
|
||||
traverse_obj(fusion_metadata, ('headlines', 'basic'))
|
||||
or self._og_search_title(webpage)
|
||||
or self._html_extract_title(webpage)
|
||||
or self._generic_title('', webpage)
|
||||
)
|
||||
return self.playlist_result(entries, display_id, playlist_title)
|
||||
|
||||
@@ -40,7 +40,6 @@ class ParamountPlusIE(CBSBaseIE):
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks'], # TODO: Investigate this
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/shows/video/6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd/',
|
||||
'info_dict': {
|
||||
@@ -63,7 +62,6 @@ class ParamountPlusIE(CBSBaseIE):
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks'],
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/movies/video/vM2vm0kE6vsS2U41VhMRKTOVHyQAr6pC/',
|
||||
'info_dict': {
|
||||
@@ -118,8 +116,11 @@ class ParamountPlusIE(CBSBaseIE):
|
||||
|
||||
def _extract_video_info(self, content_id, mpx_acc=2198311517):
|
||||
items_data = self._download_json(
|
||||
'https://www.paramountplus.com/apps-api/v2.0/androidtv/video/cid/%s.json' % content_id,
|
||||
content_id, query={'locale': 'en-us', 'at': 'ABCqWNNSwhIqINWIIAG+DFzcFUvF8/vcN6cNyXFFfNzWAIvXuoVgX+fK4naOC7V8MLI='}, headers=self.geo_verification_headers())
|
||||
f'https://www.paramountplus.com/apps-api/v2.0/androidtv/video/cid/{content_id}.json',
|
||||
content_id, query={
|
||||
'locale': 'en-us',
|
||||
'at': 'ABCXgPuoStiPipsK0OHVXIVh68zNys+G4f7nW9R6qH68GDOcneW6Kg89cJXGfiQCsj0=',
|
||||
}, headers=self.geo_verification_headers())
|
||||
|
||||
asset_types = {
|
||||
item.get('assetType'): {
|
||||
|
||||
75
yt_dlp/extractor/podbayfm.py
Normal file
75
yt_dlp/extractor/podbayfm.py
Normal file
@@ -0,0 +1,75 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import OnDemandPagedList, int_or_none, jwt_decode_hs256, try_call
|
||||
|
||||
|
||||
def result_from_props(props, episode_id=None):
|
||||
return {
|
||||
'id': props.get('podcast_id') or episode_id,
|
||||
'title': props.get('title'),
|
||||
'url': props['mediaURL'],
|
||||
'ext': 'mp3',
|
||||
'thumbnail': try_call(lambda: jwt_decode_hs256(props['image'])['url']),
|
||||
'timestamp': props.get('timestamp'),
|
||||
'duration': int_or_none(props.get('duration')),
|
||||
}
|
||||
|
||||
|
||||
class PodbayFMIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://podbay\.fm/p/[^/]*/e/(?P<id>[^/]*)/?(?:[\?#].*)?$'
|
||||
_TESTS = [{
|
||||
'url': 'https://podbay.fm/p/behind-the-bastards/e/1647338400',
|
||||
'md5': '98b41285dcf7989d105a4ed0404054cf',
|
||||
'info_dict': {
|
||||
'id': '1647338400',
|
||||
'title': 'Part One: Kissinger',
|
||||
'ext': 'mp3',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'timestamp': 1647338400,
|
||||
'duration': 5001,
|
||||
'upload_date': '20220315',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode_id)
|
||||
data = self._search_nextjs_data(webpage, episode_id)
|
||||
return result_from_props(data['props']['pageProps']['episode'], episode_id)
|
||||
|
||||
|
||||
class PodbayFMChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/]*)/?(?:[\?#].*)?$'
|
||||
_TESTS = [{
|
||||
'url': 'https://podbay.fm/p/behind-the-bastards',
|
||||
'info_dict': {
|
||||
'id': 'behind-the-bastards',
|
||||
'title': 'Behind the Bastards',
|
||||
},
|
||||
}]
|
||||
_PAGE_SIZE = 10
|
||||
|
||||
def _fetch_page(self, channel_id, pagenum):
|
||||
return self._download_json(
|
||||
f'https://podbay.fm/api/podcast?reverse=true&page={pagenum}&slug={channel_id}',
|
||||
channel_id)['podcast']
|
||||
|
||||
@staticmethod
|
||||
def _results_from_page(channel_id, page):
|
||||
return [{
|
||||
**result_from_props(e),
|
||||
'extractor': PodbayFMIE.IE_NAME,
|
||||
'extractor_key': PodbayFMIE.ie_key(),
|
||||
# somehow they use timestamps as the episode identifier
|
||||
'webpage_url': f'https://podbay.fm/p/{channel_id}/e/{e["timestamp"]}',
|
||||
} for e in page['episodes']]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
first_page = self._fetch_page(channel_id, 0)
|
||||
entries = OnDemandPagedList(
|
||||
lambda pagenum: self._results_from_page(
|
||||
channel_id, self._fetch_page(channel_id, pagenum) if pagenum else first_page),
|
||||
self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(entries, channel_id, first_page.get('title'))
|
||||
@@ -21,6 +21,23 @@ class PrankCastIE(InfoExtractor):
|
||||
'tags': ['prank call', 'prank'],
|
||||
'upload_date': '20220825'
|
||||
}
|
||||
}, {
|
||||
'url': 'https://prankcast.com/phonelosers/showreel/2048-NOT-COOL',
|
||||
'info_dict': {
|
||||
'id': '2048',
|
||||
'ext': 'mp3',
|
||||
'title': 'NOT COOL',
|
||||
'display_id': 'NOT-COOL',
|
||||
'timestamp': 1665028364,
|
||||
'uploader': 'phonelosers',
|
||||
'channel_id': 6,
|
||||
'duration': 4044,
|
||||
'cast': ['phonelosers'],
|
||||
'description': '',
|
||||
'categories': ['prank'],
|
||||
'tags': ['prank call', 'prank'],
|
||||
'upload_date': '20221006'
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
47
yt_dlp/extractor/qingting.py
Normal file
47
yt_dlp/extractor/qingting.py
Normal file
@@ -0,0 +1,47 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import traverse_obj
|
||||
|
||||
|
||||
class QingTingIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?(?:qingting\.fm|qtfm\.cn)/v?channels/(?P<channel>\d+)/programs/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.qingting.fm/channels/378005/programs/22257411/',
|
||||
'md5': '47e6a94f4e621ed832c316fd1888fb3c',
|
||||
'info_dict': {
|
||||
'id': '22257411',
|
||||
'title': '用了十年才修改,谁在乎教科书?',
|
||||
'channel_id': '378005',
|
||||
'channel': '睡前消息',
|
||||
'uploader': '马督工',
|
||||
'ext': 'm4a',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://m.qtfm.cn/vchannels/378005/programs/23023573/',
|
||||
'md5': '2703120b6abe63b5fa90b975a58f4c0e',
|
||||
'info_dict': {
|
||||
'id': '23023573',
|
||||
'title': '【睡前消息488】重庆山火之后,有图≠真相',
|
||||
'channel_id': '378005',
|
||||
'channel': '睡前消息',
|
||||
'uploader': '马督工',
|
||||
'ext': 'm4a',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id, pid = self._match_valid_url(url).group('channel', 'id')
|
||||
webpage = self._download_webpage(
|
||||
f'https://m.qtfm.cn/vchannels/{channel_id}/programs/{pid}/', pid)
|
||||
info = self._search_json(r'window\.__initStores\s*=', webpage, 'program info', pid)
|
||||
return {
|
||||
'id': pid,
|
||||
'title': traverse_obj(info, ('ProgramStore', 'programInfo', 'title')),
|
||||
'channel_id': channel_id,
|
||||
'channel': traverse_obj(info, ('ProgramStore', 'channelInfo', 'title')),
|
||||
'uploader': traverse_obj(info, ('ProgramStore', 'podcasterInfo', 'podcaster', 'nickname')),
|
||||
'url': traverse_obj(info, ('ProgramStore', 'programInfo', 'audioUrl')),
|
||||
'vcodec': 'none',
|
||||
'acodec': 'm4a',
|
||||
'ext': 'm4a',
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user