from .common import InfoExtractor DEBUG_P = False if DEBUG_P: import json from icecream import ic from IPython import embed def gen_dict_extract(var, key): if hasattr(var, "items"): for k, v in var.items(): if k == key: yield v if isinstance(v, dict): for result in gen_dict_extract(v, key): yield result elif isinstance(v, list): for d in v: for result in gen_dict_extract(d, key): yield result class UnderlineIE(InfoExtractor): _VALID_URL = r"https?://(?:www\.)?underline\.io/events/(?P[^?]+).*" _TESTS = [ { "params": { "skip_download": True, }, "url": "https://underline.io/events/342/posters/12863/poster/66463-mbti-personality-prediction-approach-on-persian-twitter?tab=video", "md5": "md5:eaa894161adaef6efd6008681e1cd2c5", # md5 sum of the first 10241 bytes of the video file (use --test) "info_dict": { "id": "342/posters/12863/poster/66463-mbti-personality-prediction-approach-on-persian-twitter", "ext": "mp4", "title": "MBTI Personality Prediction Approach on Persian Twitter", # * A value # * MD5 checksum; start the string with md5: # * A regular expression; start the string with re: # * Any Python type, e.g. int or float }, } ] def _real_extract(self, url): # cookies = self._get_cookies(url) # if DEBUG_P: # ic(cookies) # if not cookies: # self.raise_login_required('Cookies are needed to download from this website', method='cookies') video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) webpage_info = self._search_json( r'', webpage, "idk_what_this_arg_does", video_id, end_pattern=r"", ) if DEBUG_P: # ic(webpage_info) with open("./tmp.json", "w") as f: json.dump(webpage_info, f) # ic(webpage_info["props"]["pageProps"]["snapshot"]["models"][10]["title"]) # embed() title = list(gen_dict_extract(webpage_info, "title")) if DEBUG_P: ic(title) if len(title) == 0: title = None else: title = title[0] playlist_urls = list(gen_dict_extract(webpage_info, "playlist")) if DEBUG_P: ic(playlist_urls) if len(playlist_urls) == 0: url = None else: url = playlist_urls[0] formats = [] m3u8_url = url if m3u8_url: formats.extend(self._extract_m3u8_formats( m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native')) return { "id": video_id, "title": title, "formats": formats, }