2024-01-18 20:51:53 -06:00
from . common import InfoExtractor
from . . utils import (
ExtractorError ,
float_or_none ,
int_or_none ,
url_or_none ,
)
from . . utils . traversal import traverse_obj
2025-02-04 04:38:59 -06:00
PODCAST_API = ' https://api-prod.ilpost.it/podcast/v1/podcast/ %s ?hits=20 '
2024-01-18 20:51:53 -06:00
2025-02-04 04:48:49 -06:00
2024-01-18 20:51:53 -06:00
class IlPostIE ( InfoExtractor ) :
2025-02-04 04:38:59 -06:00
_VALID_URL = r ' https?://(?:www \ .)?ilpost \ .it/podcasts/(?:.*?)/(?P<id>[^/?#]+) '
2024-01-18 20:51:53 -06:00
_TESTS = [ {
2025-02-04 04:38:59 -06:00
' url ' : ' https://www.ilpost.it/podcasts/l-invasione/1-avis-akvasas-ka/ ' ,
2024-01-18 20:51:53 -06:00
' md5 ' : ' 43649f002d85e1c2f319bb478d479c40 ' ,
' info_dict ' : {
' id ' : ' 2972047 ' ,
' ext ' : ' mp3 ' ,
' display_id ' : ' 1-avis-akvasas-ka ' ,
' title ' : ' 1. Avis akvasas ka ' ,
' url ' : ' https://www.ilpost.it/wp-content/uploads/2023/12/28/1703781217-l-invasione-pt1-v6.mp3 ' ,
' timestamp ' : 1703835014 ,
' upload_date ' : ' 20231229 ' ,
2025-02-04 04:47:41 -06:00
' description ' : ' <p>Circa tre miliardi di persone, oggi, parlano lingue che hanno un’ unica antenata: dall’ italiano all’ inglese, passando per il farsi e l’ islandese, queste lingue discendono tutte da una lingua arrivata in Europa circa cinquemila anni fa, insieme a un gruppo di persone ben preciso.<br /> \n Con la loro lingua queste persone si portarono dietro anche alcuni oggetti, miti e leggende, e una certa visione della società, lasciando tracce indelebili ancora oggi.</p> \n <p>Per approfondire gli argomenti trattati nel podcast abbiamo raccolto in <a href= " https://www.ilpost.it/2024/01/05/invasione-testi/?homepagePosition=3 " >questa pagina</a> le cose da leggere e da guardare dopo aver ascoltato le puntate.</p> \n ' ,
2024-01-18 20:51:53 -06:00
' duration ' : 2495.0 ,
' availability ' : ' public ' ,
' series_id ' : ' 235598 ' ,
2025-02-04 04:38:59 -06:00
' thumbnail ' : ' https://www.ilpost.it/wp-content/uploads/2023/12/22/1703238848-copertina500x500.jpg ' ,
2024-06-11 18:09:58 -05:00
} ,
2024-01-18 20:51:53 -06:00
} ]
def _real_extract ( self , url ) :
display_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , display_id )
endpoint_metadata = self . _search_json (
2025-02-04 04:38:59 -06:00
r ' { " props " : { " pageProps " : ' , webpage , ' metadata ' , display_id )
episode_id = endpoint_metadata [ ' data ' ] [ ' data ' ] [ ' episode ' ] [ ' data ' ] [ 0 ] [ ' id ' ]
podcast_id = traverse_obj ( endpoint_metadata , ( ' data ' , ' data ' , ' episode ' , ' data ' , 0 , ' parent ' , ' id ' ) )
podcast_metadata = traverse_obj ( endpoint_metadata , ( ' data ' , ' data ' , ' episode ' , ' data ' , 0 ) )
2024-01-18 20:51:53 -06:00
2025-02-04 04:38:59 -06:00
episode = podcast_metadata
2024-01-18 20:51:53 -06:00
if not episode :
raise ExtractorError ( ' Episode could not be extracted ' )
return {
2025-02-04 04:38:59 -06:00
' id ' : str ( episode_id ) ,
' display_id ' : str ( display_id ) ,
' series_id ' : str ( podcast_id ) ,
2024-01-18 20:51:53 -06:00
* * traverse_obj ( episode , {
' title ' : ( ' title ' , { str } ) ,
2025-02-04 04:38:59 -06:00
' url ' : ( ' episode_raw_url ' , { url_or_none } ) ,
2024-01-18 20:51:53 -06:00
' thumbnail ' : ( ' image ' , { url_or_none } ) ,
2025-02-04 04:38:59 -06:00
' description ' : ( ' content_html ' , { str } ) ,
2024-01-18 20:51:53 -06:00
' timestamp ' : ( ' timestamp ' , { int_or_none } ) ,
2024-11-03 18:33:21 -06:00
' duration ' : ( ' milliseconds ' , { float_or_none ( scale = 1000 ) } ) ,
2025-02-04 04:38:59 -06:00
' availability ' : ( ' access_level ' , { lambda v : ' public ' if v else ' subscriber_only ' } ) ,
2024-01-18 20:51:53 -06:00
} ) ,
}
2025-02-04 04:38:59 -06:00
2025-02-04 04:48:49 -06:00
2025-02-04 04:38:59 -06:00
class IlPostPodcastIE ( InfoExtractor ) :
2025-02-04 16:07:46 -06:00
_VALID_URL = r ' https?://(?:www \ .)?ilpost \ .it/podcasts/(?P<id>[a-zA-Z0-9 \ -]+)/?$ '
2025-02-04 04:38:59 -06:00
_TESTS = [ {
' url ' : ' https://www.ilpost.it/podcasts/morning/ ' ,
' info_dict ' : {
' id ' : ' morning ' ,
' display_id ' : ' morning ' ,
' title ' : ' Morning ' ,
' series ' : ' Morning ' ,
' season_number ' : 1 ,
} ,
' playlist_mincount ' : 20 ,
} , {
' url ' : ' https://www.ilpost.it/podcasts/basaglia-e-i-suoi/ ' ,
' info_dict ' : {
' id ' : ' basaglia-e-i-suoi ' ,
' display_id ' : ' basaglia-e-i-suoi ' ,
' title ' : ' Basaglia e i suoi ' ,
' series ' : ' Basaglia e i suoi ' ,
' season_number ' : 1 ,
} ,
' playlist_mincount ' : 5 ,
} ]
def _real_extract ( self , url ) :
2025-02-04 04:47:41 -06:00
display_id = self . _match_valid_url ( url ) . group ( ' id ' )
data = self . _download_json ( PODCAST_API % display_id , display_id )
2025-02-04 04:38:59 -06:00
entries = [ {
' _type ' : ' url ' ,
' url ' : episode [ ' url ' ] ,
' title ' : episode . get ( ' title ' ) ,
' description ' : episode . get ( ' content_html ' ) ,
' series ' : traverse_obj ( data , ( ' data ' , 0 , ' parent ' , ' title ' ) ) ,
' season_number ' : 1 ,
' episode_number ' : episode [ ' id ' ] ,
} for episode in traverse_obj ( data , ( ' data ' ) ) ]
return {
' _type ' : ' playlist ' ,
2025-02-04 04:47:41 -06:00
' id ' : display_id ,
' display_id ' : display_id ,
2025-02-04 04:38:59 -06:00
' title ' : traverse_obj ( data , ( ' data ' , 0 , ' parent ' , ' title ' ) ) ,
' series ' : traverse_obj ( data , ( ' data ' , 0 , ' parent ' , ' title ' ) ) ,
' entries ' : entries ,
' season_number ' : 1 ,
}