from .wistia import WistiaIE from ..utils import ( clean_html, get_elements_html_by_class ) class ThirtyDaySingerBase(WistiaIE): def _extract_for_url(self, url): lesson_index = self._match_id(url) webpage = self._download_webpage(url, lesson_index) match = next(self._extract_wistia_async_embed(webpage)) embed_config = self._download_embed_config('medias', match.group('id'), url) embed_infojson = self._extract_media(embed_config) webpage_infojson = self._extract_webpage_data(webpage) return {**embed_infojson, **webpage_infojson} def _extract_webpage_data(self, webpage): title = self._html_search_regex(r'

([^<]+)

', webpage, 'title') fallback_title = self._html_extract_title(webpage) description = self._html_search_meta('description', webpage, fatal=False) return { 'title': title or fallback_title, 'description': clean_html(self._format_html_list(description)) } # The site makes extensive use of HTML lists for formatting and `clean_html` # doesn't handle them well. This is needed to keep lists readable. def _format_html_list(self, html): replacements = { '