1
0
Fork 0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-03-09 12:50:23 -05:00
yt-dlp/yt_dlp/extractor/tvw.py

120 lines
5.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import ExtractorError, RegexNotFoundError, clean_html, traverse_obj, unified_timestamp
class TVWIE(InfoExtractor):
BACKUP_API_KEY = '7WhiEBzijpritypp8bqcU7pfU9uicDR'
_VALID_URL = r'https?://(?:www\.)?tvw\.org/video/(?P<id>[^/]+)'
_TESTS = [{
'url': 'https://tvw.org/video/billy-frank-jr-statue-maquette-unveiling-ceremony-2024011211/',
'md5': '9ceb94fe2bb7fd726f74f16356825703',
'info_dict': {
'id': '2024011211',
'ext': 'mp4',
'title': 'Billy Frank Jr. Statue Maquette Unveiling Ceremony',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:58a8150017d985b4f377e11ee8f6f36e',
'timestamp': 1704902400,
'upload_date': '20240110',
'location': 'Legislative Building',
}}, {
'url': 'https://tvw.org/video/ebeys-landing-state-park-2024081007/',
'md5': '71e87dae3deafd65d75ff3137b9a32fc',
'info_dict': {
'id': '2024081007',
'ext': 'mp4',
'title': 'Ebey\'s Landing State Park',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:50c5bd73bde32fa6286a008dbc853386',
'timestamp': 1724310900,
'upload_date': '20240822',
'location': 'Ebeys Landing State Park',
}},
{
'url': 'https://tvw.org/video/home-warranties-workgroup-2',
'info_dict': {
'id': '1999121000',
'ext': 'mp4',
'title': 'Home Warranties Workgroup',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:861396cc523c9641d0dce690bc5c35f3',
'timestamp': 946389600,
'upload_date': '19991228',
},
}]
def _get_subtitles(self, response):
return {'en': [{'ext': 'vtt', 'url': response.get('captionPath')}]}
def _get_thumbnail(self, response):
return [{'url': response.get('videoThumbnail')}]
def _get_description(self, response):
return clean_html(response.get('description'))
def _get_js_code(self, video_id, webpage):
app_js_url = self._html_search_regex(
r'<script[^>]+src=[\"\'](?P<app_js>.+?)[\"\'][^>]+id=\"invintus-app-js\"',
webpage, 'app_js')
return self._download_webpage(app_js_url, video_id, 'Downloading app.js API key')
def _extract_formats(self, response, video_id, stream_url):
extract_formats = lambda url, video_id: self._extract_m3u8_formats(url, video_id, 'mp4')
try:
return extract_formats(stream_url, video_id)
except ExtractorError:
self.report_warning('Failed to parse the m3u8 stream. Falling back to the backup stream if it exists.')
try:
stream_url = traverse_obj(response, ('streamingURIs', 'backup'))
return extract_formats(stream_url, video_id)
except ExtractorError:
raise
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
app_js_code = self._get_js_code(video_id, webpage)
try:
api_key = self._search_regex(r'embedderAuth[\s]*:[\s]*{[^}]+vendorKey:\"(?P<token>\w+?)\"}',
app_js_code, 'token')
except RegexNotFoundError:
self.report_warning('Failed to extract the API key. Falling back to a hardcoded key.')
api_key = self.BACKUP_API_KEY
client_id = self._html_search_meta('clientID', webpage)
video_id = self._html_search_meta('eventID', webpage)
try:
headers = {'authorization': 'embedder', 'wsc-api-key': api_key}
data = json.dumps({'clientID': client_id, 'eventID': video_id,
'showStreams': True}).encode('utf8')
response = self._download_json('https://api.v3.invintus.com/v2/Event/getDetailed',
video_id, headers=headers, data=data).get('data')
except ExtractorError as e:
if isinstance(e.cause, HTTPError):
self.write_debug(e.cause)
response = self._parse_json(e.cause.response.read().decode(), video_id).get('errors')
if response.get('hasError') is True:
raise ExtractorError('{} said: {}'.format(self.IE_NAME, response.get('error')), expected=True)
raise
stream_url = traverse_obj(response, ('streamingURIs', 'main'))
formats = self._extract_formats(response, video_id, stream_url)
return {
'id': video_id,
'title': response.get('title') or self._og_search_title(webpage),
'description': self._get_description(response) or self._og_search_description(webpage),
'formats': formats,
'thumbnail': response.get('videoThumbnail'),
'subtitles': self.extract_subtitles(response),
'timestamp': unified_timestamp(response.get('startDateTime')),
'location': response.get('locationName'),
}