1
0
Fork 0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-03-09 12:50:23 -05:00
yt-dlp/yt_dlp/extractor/softwhiteunderbelly.py

130 lines
5.3 KiB
Python
Raw Normal View History

from .common import InfoExtractor
from .vimeo import VHXEmbedIE
from ..utils import (
ExtractorError,
clean_html,
get_element_by_class,
get_element_by_id,
unified_strdate,
urlencode_postdata,
)
class SoftWhiteUnderbellyIE(InfoExtractor):
2025-02-04 22:46:23 -06:00
_LOGIN_URL = 'https://www.softwhiteunderbelly.com/login'
_NETRC_MACHINE = 'softwhiteunderbelly'
2025-02-04 22:46:23 -06:00
_VALID_URL = r'https?://(?:www\.)?softwhiteunderbelly\.com/videos/(?P<id>.+)'
_TESTS = [
{
2025-02-04 22:46:23 -06:00
'url': 'https://www.softwhiteunderbelly.com/videos/kenneth-final1',
'note': 'A single Soft White Underbelly Episode',
'md5': '8e79f29ec1f1bda6da2e0b998fcbebb8',
'info_dict': {
'id': '3201266',
'ext': 'mp4',
'display_id': 'kenneth-final1',
'title': 'Appalachian Man interview-Kenneth',
'description': 'Soft White Underbelly interview and portrait of Kenneth, an Appalachian man in Clay County, Kentucky.',
'thumbnail': 'https://vhx.imgix.net/softwhiteunderbelly/assets/249f6db0-2b39-49a4-979b-f8dad4681825.jpg',
'uploader_url': 'https://vimeo.com/user80538407',
'uploader': 'OTT Videos',
'uploader_id': 'user80538407',
'duration': 512,
},
2025-02-04 22:46:23 -06:00
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
2025-02-04 22:46:23 -06:00
'url': 'https://www.softwhiteunderbelly.com/videos/tj-2-final-2160p',
'note': 'A single Soft White Underbelly Episode',
'md5': '286bd8851b4824c62afb369e6f307036',
'info_dict': {
'id': '3506029',
'ext': 'mp4',
'display_id': 'tj-2-final-2160p',
'title': 'Fentanyl Addict interview-TJ (follow up)',
'description': 'Soft White Underbelly follow up interview and portrait of TJ, a fentanyl addict on Skid Row.',
'thumbnail': 'https://vhx.imgix.net/softwhiteunderbelly/assets/c883d531-5da0-4faf-a2e2-8eba97e5adfc.jpg',
'duration': 817,
'uploader': 'OTT Videos',
'uploader_url': 'https://vimeo.com/user80538407',
'uploader_id': 'user80538407',
},
2025-02-04 22:46:23 -06:00
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
]
def _get_authenticity_token(self, display_id):
2025-02-04 22:46:23 -06:00
signin_page = self._download_webpage(self._LOGIN_URL, display_id, note='Getting authenticity token')
return self._html_search_regex(
2025-02-04 22:46:23 -06:00
r'name=["\']authenticity_token["\'] value=["\'](.+?)["\']', signin_page, 'authenticity_token',
)
def _login(self, display_id):
username, password = self._get_login_info()
if not username:
return True
response = self._download_webpage(
self._LOGIN_URL,
display_id,
2025-02-04 22:46:23 -06:00
note='Logging in',
fatal=False,
data=urlencode_postdata({
2025-02-04 22:46:23 -06:00
'email': username,
'password': password,
'authenticity_token': self._get_authenticity_token(display_id),
'utf8': True,
}),
)
user_has_subscription = self._search_regex(
2025-02-04 22:46:23 -06:00
r'user_has_subscription:\s*["\'](.+?)["\']', response, 'subscription status', default='none',
)
2025-02-04 22:46:23 -06:00
if user_has_subscription.lower() == 'true':
return
2025-02-04 22:46:23 -06:00
elif user_has_subscription.lower() == 'false':
return 'Account is not subscribed'
else:
2025-02-04 22:46:23 -06:00
return 'Incorrect username/password'
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = None
2025-02-04 22:46:23 -06:00
if self._get_cookies('https://www.softwhiteunderbelly.com').get('_session'):
webpage = self._download_webpage(url, display_id)
if not webpage or '<div id="watch-unauthorized"' in webpage:
login_err = self._login(display_id)
webpage = self._download_webpage(url, display_id)
if login_err and '<div id="watch-unauthorized"' in webpage:
if login_err is True:
2025-02-04 22:46:23 -06:00
self.raise_login_required(method='any')
raise ExtractorError(login_err, expected=True)
2025-02-04 22:46:23 -06:00
embed_url = self._html_search_regex(r'embed_url:\s*["\'](.+?)["\']', webpage, 'embed url')
thumbnail = self._og_search_thumbnail(webpage)
2025-02-04 22:46:23 -06:00
watch_info = get_element_by_id('watch-info', webpage) or ''
2025-02-04 22:46:23 -06:00
title = clean_html(get_element_by_class('video-title', watch_info))
2025-02-04 22:46:23 -06:00
return {
'_type': 'url_transparent',
'ie_key': VHXEmbedIE.ie_key(),
'url': VHXEmbedIE._smuggle_referrer(embed_url, 'https://www.softwhiteunderbelly.com'),
'id': self._search_regex(r'embed\.vhx\.tv/videos/(.+?)\?', embed_url, 'id'),
'display_id': display_id,
'title': title,
'description': self._html_search_meta('description', webpage, fatal=False),
'thumbnail': thumbnail.split('?')[0] if thumbnail else None, # Ignore crop/downscale
'release_date': unified_strdate(
self._search_regex(
r'data-meta-field-name=["\']release_dates["\'] data-meta-field-value=["\'](.+?)["\']',
watch_info,
2025-02-04 22:46:23 -06:00
'release date',
default=None,
2025-02-04 22:46:23 -06:00
),
),
}