diff --git a/README.md b/README.md index 634cff5a5..00ed6b7dd 100644 --- a/README.md +++ b/README.md @@ -301,6 +301,7 @@ ## General Options: error occurs (Alias: --no-ignore-errors) --dump-user-agent Display the current user-agent and exit --list-extractors List all supported extractors and exit + --list-extractors-json List all supported extractors in json and exit --extractor-descriptions Output descriptions of all supported extractors and exit --use-extractors NAMES Extractor names to use separated by commas. @@ -1870,7 +1871,21 @@ #### sonylivseries +# EXTRACTOR INFO JSON +parameter `--list-extractors-json` output information from extractor(s) formated as JSON. If some URL(s) are specified, only the extractors matching at list one URL are listed. If none is specified, all extractors are listed. The generic extractor is always the last in the list. +### List of values returned +key | type | description +:------------|:----------------|:---------------------------- +index | int | index in list, starting from 0 +name | string | name of the extractor +desc | string | description of the extractor +working | bool | true if the extractor is working +enabled | bool | true if the extractor is enabled +return_type | string | type of data returned by the extractor ("video", "playlist", "any", or None) +regex_urls | array of string | list of regex used by the extractor to match a given url +matched_urls | array of string | list of url(s) passed in the command line that matched the given extractor. Present only if URL(s) are specified. + # PLUGINS Note that **all** plugins are imported even if not invoked, and that **there are no checks** performed on plugin code. **Use plugins at your own risk and only if you trust the code!** diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 7d8f10047..7844065d3 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -13,6 +13,7 @@ import os import re import traceback +import json from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS, CookieLoadError from .downloader.external import get_external_downloader @@ -118,6 +119,45 @@ def print_extractor_information(opts, urls): out = 'Supported TV Providers:\n{}\n'.format(render_table( ['mso', 'mso name'], [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()])) + elif opts.list_extractors_json: + from .extractor.generic import GenericIE + dicts = [] + e_index = 0 + urls = dict.fromkeys(urls, False) + if len(urls): + for ie in gen_extractors(): + if ie == GenericIE: + matched_urls = [url for url, matched in urls.items() if not matched] + else: + matched_urls = tuple(filter(ie.suitable, urls.keys())) + urls.update(dict.fromkeys(matched_urls, True)) + # show only extractor with matched URL + if len(matched_urls): + data = {'index': e_index, + 'name': ie.IE_NAME, + 'desc': ie.IE_DESC if ie.IE_DESC else '', + 'working': ie.working(), + 'enabled': ie.is_enabled(), + 'return_type': ie.return_type(), + 'regex_urls': ie.list_regex_url(), + 'matched_urls': matched_urls, + } + e_index += 1 + dicts.append(data) + else: + # show all extractors + for ie in gen_extractors(): + data = {'index': e_index, + 'name': ie.IE_NAME, + 'desc': ie.IE_DESC if ie.IE_DESC else '', + 'working': ie.working(), + 'enabled': ie.is_enabled(), + 'return_type': ie.return_type(), + 'regex_urls': ie.list_regex_url(), + } + dicts.append(data) + e_index += 1 + out = json.dumps(dicts, indent=4) else: return False write_string(out, out=sys.stdout) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index b816d788f..e7cba37df 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -13,6 +13,7 @@ import os import random import re +import string import subprocess import sys import time @@ -615,6 +616,22 @@ def suitable(cls, url): # so that lazy_extractors works correctly return cls._match_valid_url(url) is not None + @classmethod + def list_regex_url(cls): + return cls._VALID_URL if type(cls._VALID_URL) in [list, tuple] \ + else (cls._VALID_URL.translate({ord(c): None for c in string.whitespace}),) if type(cls._VALID_URL) is str \ + else [] + + @classmethod + def return_type(cls): + if '_RETURN_TYPE' not in cls.__dict__: + return '' + return cls._RETURN_TYPE + + @classmethod + def is_enabled(cls): + return cls._ENABLED + @classmethod def _match_id(cls, url): return cls._match_valid_url(url).group('id') diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 91c2635a7..c39c0ce31 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -362,6 +362,10 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): '--list-extractors', action='store_true', dest='list_extractors', default=False, help='List all supported extractors and exit') + general.add_option( + '--list-extractors-json', + action='store_true', dest='list_extractors_json', default=False, + help='List all supported extractors parameters in JSON format and exit') general.add_option( '--extractor-descriptions', action='store_true', dest='list_extractor_descriptions', default=False,