2018-04-24 11:02:38 -05:00
from . common import InfoExtractor
2023-07-09 02:53:02 -05:00
from . . networking . exceptions import HTTPError
2018-04-24 11:02:38 -05:00
from . . utils import (
2018-07-22 08:25:46 -05:00
ExtractorError ,
2018-04-24 11:02:38 -05:00
float_or_none ,
2024-05-26 14:27:21 -05:00
int_or_none ,
2018-07-22 08:25:46 -05:00
parse_resolution ,
2018-04-24 11:02:38 -05:00
str_or_none ,
try_get ,
2018-07-22 08:25:46 -05:00
unified_timestamp ,
url_or_none ,
urljoin ,
2018-04-24 11:02:38 -05:00
)
class PuhuTVIE ( InfoExtractor ) :
2018-07-22 08:25:46 -05:00
_VALID_URL = r ' https?://(?:www \ .)?puhutv \ .com/(?P<id>[^/?#&]+)-izle '
2018-04-24 11:02:38 -05:00
IE_NAME = ' puhutv '
2018-07-22 08:25:46 -05:00
_TESTS = [ {
# film
2024-12-30 13:56:00 -06:00
' url ' : ' https://puhutv.com/bi-kucuk-eylul-meselesi-izle ' ,
' md5 ' : ' 4de98170ccb84c05779b1f046b3c86f8 ' ,
2018-07-22 08:25:46 -05:00
' info_dict ' : {
2024-12-30 13:56:00 -06:00
' id ' : ' 11909 ' ,
' display_id ' : ' bi-kucuk-eylul-meselesi ' ,
2018-07-22 08:25:46 -05:00
' ext ' : ' mp4 ' ,
2024-12-30 13:56:00 -06:00
' title ' : ' Bi Küçük Eylül Meselesi ' ,
' description ' : " Geçirdiği kazadan sonra son bir ayı nı hatı rlamayan Eylül, bu süre içinde Bozcaada ' da olduğunu öğrenir ve gerçeklerin peşine düşmek için Bozcaada ' ya gider ve onu tanı yan Tekin adlı çizerle yakı nlaşı r. " ,
2018-07-22 08:25:46 -05:00
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
2024-12-30 13:56:00 -06:00
' duration ' : 6176.96 ,
' creator ' : ' Ay Yapı m ' ,
' creators ' : [ ' Ay Yapı m ' ] ,
' timestamp ' : 1561062749 ,
2019-10-24 06:53:33 -05:00
' upload_date ' : ' 20190620 ' ,
2024-12-30 13:56:00 -06:00
' release_year ' : 2014 ,
2018-07-22 08:25:46 -05:00
' view_count ' : int ,
2019-10-24 06:53:33 -05:00
' tags ' : list ,
2018-04-24 11:02:38 -05:00
} ,
2018-07-22 08:25:46 -05:00
} , {
# episode, geo restricted, bypassable with --geo-verification-proxy
' url ' : ' https://puhutv.com/jet-sosyete-1-bolum-izle ' ,
' only_matching ' : True ,
} , {
# 4k, with subtitles
' url ' : ' https://puhutv.com/dip-1-bolum-izle ' ,
' only_matching ' : True ,
} ]
2018-04-24 11:02:38 -05:00
_SUBTITLE_LANGS = {
' English ' : ' en ' ,
' Deutsch ' : ' de ' ,
2024-06-11 18:09:58 -05:00
' عربى ' : ' ar ' ,
2018-04-24 11:02:38 -05:00
}
def _real_extract ( self , url ) :
2018-07-22 08:25:46 -05:00
display_id = self . _match_id ( url )
2018-04-24 11:02:38 -05:00
info = self . _download_json (
2024-06-11 18:09:58 -05:00
urljoin ( url , f ' /api/slug/ { display_id } -izle ' ) ,
2018-07-22 08:25:46 -05:00
display_id ) [ ' data ' ]
2018-04-24 11:02:38 -05:00
2024-06-11 18:09:58 -05:00
video_id = str ( info [ ' id ' ] )
2019-10-24 06:53:33 -05:00
show = info . get ( ' title ' ) or { }
title = info . get ( ' name ' ) or show [ ' name ' ]
2018-04-24 11:02:38 -05:00
if info . get ( ' display_name ' ) :
2024-06-11 18:09:58 -05:00
title = ' {} {} ' . format ( title , info [ ' display_name ' ] )
2018-04-24 11:02:38 -05:00
2018-07-22 08:25:46 -05:00
try :
videos = self . _download_json (
2024-06-11 18:09:58 -05:00
f ' https://puhutv.com/api/assets/ { video_id } /videos ' ,
2018-07-22 08:25:46 -05:00
display_id , ' Downloading video JSON ' ,
headers = self . geo_verification_headers ( ) )
except ExtractorError as e :
2023-07-09 02:53:02 -05:00
if isinstance ( e . cause , HTTPError ) and e . cause . status == 403 :
2018-07-22 08:25:46 -05:00
self . raise_geo_restricted ( )
raise
2019-10-24 06:53:33 -05:00
urls = [ ]
2018-07-22 08:25:46 -05:00
formats = [ ]
2019-10-24 06:53:33 -05:00
2018-07-22 08:25:46 -05:00
for video in videos [ ' data ' ] [ ' videos ' ] :
media_url = url_or_none ( video . get ( ' url ' ) )
2019-10-24 06:53:33 -05:00
if not media_url or media_url in urls :
2018-07-22 08:25:46 -05:00
continue
2019-10-24 06:53:33 -05:00
urls . append ( media_url )
2018-07-22 08:25:46 -05:00
playlist = video . get ( ' is_playlist ' )
2019-10-24 06:53:33 -05:00
if ( video . get ( ' stream_type ' ) == ' hls ' and playlist is True ) or ' playlist.m3u8 ' in media_url :
2020-05-04 09:15:19 -05:00
formats . extend ( self . _extract_m3u8_formats (
2018-07-22 08:25:46 -05:00
media_url , video_id , ' mp4 ' , entry_protocol = ' m3u8_native ' ,
2020-05-04 09:15:19 -05:00
m3u8_id = ' hls ' , fatal = False ) )
2018-07-22 08:25:46 -05:00
continue
2019-10-24 06:53:33 -05:00
2018-07-22 08:25:46 -05:00
quality = int_or_none ( video . get ( ' quality ' ) )
f = {
' url ' : media_url ,
' ext ' : ' mp4 ' ,
2024-06-11 18:09:58 -05:00
' height ' : quality ,
2018-07-22 08:25:46 -05:00
}
video_format = video . get ( ' video_format ' )
2019-10-24 06:53:33 -05:00
is_hls = ( video_format == ' hls ' or ' /hls/ ' in media_url or ' /chunklist.m3u8 ' in media_url ) and playlist is False
if is_hls :
2018-07-22 08:25:46 -05:00
format_id = ' hls '
f [ ' protocol ' ] = ' m3u8_native '
elif video_format == ' mp4 ' :
format_id = ' http '
else :
continue
if quality :
2024-06-11 18:09:58 -05:00
format_id + = f ' - { quality } p '
2018-07-22 08:25:46 -05:00
f [ ' format_id ' ] = format_id
formats . append ( f )
creator = try_get (
2024-06-11 18:09:58 -05:00
show , lambda x : x [ ' producer ' ] [ ' name ' ] , str )
2018-07-22 08:25:46 -05:00
2019-10-24 06:53:33 -05:00
content = info . get ( ' content ' ) or { }
2018-07-22 08:25:46 -05:00
images = try_get (
2019-10-24 06:53:33 -05:00
content , lambda x : x [ ' images ' ] [ ' wide ' ] , dict ) or { }
2018-07-22 08:25:46 -05:00
thumbnails = [ ]
for image_id , image_url in images . items ( ) :
2024-06-11 18:09:58 -05:00
if not isinstance ( image_url , str ) :
2018-07-22 08:25:46 -05:00
continue
if not image_url . startswith ( ( ' http ' , ' // ' ) ) :
2024-06-11 18:09:58 -05:00
image_url = f ' https:// { image_url } '
2018-07-22 08:25:46 -05:00
t = parse_resolution ( image_id )
t . update ( {
' id ' : image_id ,
2024-06-11 18:09:58 -05:00
' url ' : image_url ,
2018-07-22 08:25:46 -05:00
} )
thumbnails . append ( t )
2018-04-24 11:02:38 -05:00
tags = [ ]
2019-10-24 06:53:33 -05:00
for genre in show . get ( ' genres ' ) or [ ] :
2018-07-22 08:25:46 -05:00
if not isinstance ( genre , dict ) :
2018-04-24 11:02:38 -05:00
continue
2018-07-22 08:25:46 -05:00
genre_name = genre . get ( ' name ' )
2024-06-11 18:09:58 -05:00
if genre_name and isinstance ( genre_name , str ) :
2018-07-22 08:25:46 -05:00
tags . append ( genre_name )
2018-04-24 11:02:38 -05:00
subtitles = { }
2019-10-24 06:53:33 -05:00
for subtitle in content . get ( ' subtitles ' ) or [ ] :
2018-04-24 11:02:38 -05:00
if not isinstance ( subtitle , dict ) :
continue
lang = subtitle . get ( ' language ' )
2019-10-24 06:53:33 -05:00
sub_url = url_or_none ( subtitle . get ( ' url ' ) or subtitle . get ( ' file ' ) )
2024-06-11 18:09:58 -05:00
if not lang or not isinstance ( lang , str ) or not sub_url :
2018-04-24 11:02:38 -05:00
continue
subtitles [ self . _SUBTITLE_LANGS . get ( lang , lang ) ] = [ {
2024-06-11 18:09:58 -05:00
' url ' : sub_url ,
2018-04-24 11:02:38 -05:00
} ]
return {
' id ' : video_id ,
' display_id ' : display_id ,
' title ' : title ,
2019-10-24 06:53:33 -05:00
' description ' : info . get ( ' description ' ) or show . get ( ' description ' ) ,
' season_id ' : str_or_none ( info . get ( ' season_id ' ) ) ,
' season_number ' : int_or_none ( info . get ( ' season_number ' ) ) ,
' episode_number ' : int_or_none ( info . get ( ' episode_number ' ) ) ,
' release_year ' : int_or_none ( show . get ( ' released_at ' ) ) ,
' timestamp ' : unified_timestamp ( info . get ( ' created_at ' ) ) ,
2018-07-22 08:25:46 -05:00
' creator ' : creator ,
2019-10-24 06:53:33 -05:00
' view_count ' : int_or_none ( content . get ( ' watch_count ' ) ) ,
' duration ' : float_or_none ( content . get ( ' duration_in_ms ' ) , 1000 ) ,
2018-04-24 11:02:38 -05:00
' tags ' : tags ,
' subtitles ' : subtitles ,
' thumbnails ' : thumbnails ,
2024-06-11 18:09:58 -05:00
' formats ' : formats ,
2018-04-24 11:02:38 -05:00
}
class PuhuTVSerieIE ( InfoExtractor ) :
2018-07-22 08:25:46 -05:00
_VALID_URL = r ' https?://(?:www \ .)?puhutv \ .com/(?P<id>[^/?#&]+)-detay '
2018-04-24 11:02:38 -05:00
IE_NAME = ' puhutv:serie '
2018-07-22 08:25:46 -05:00
_TESTS = [ {
' url ' : ' https://puhutv.com/deniz-yildizi-detay ' ,
' info_dict ' : {
' title ' : ' Deniz Yı ldı zı ' ,
' id ' : ' deniz-yildizi ' ,
2018-04-24 11:02:38 -05:00
} ,
2018-07-22 08:25:46 -05:00
' playlist_mincount ' : 205 ,
} , {
# a film detail page which is using same url with serie page
' url ' : ' https://puhutv.com/kaybedenler-kulubu-detay ' ,
' only_matching ' : True ,
} ]
def _extract_entries ( self , seasons ) :
2018-04-24 11:02:38 -05:00
for season in seasons :
2018-07-22 08:25:46 -05:00
season_id = season . get ( ' id ' )
if not season_id :
continue
page = 1
2024-12-30 13:56:00 -06:00
per = 100
2018-04-24 11:02:38 -05:00
has_more = True
while has_more is True :
2018-07-22 08:25:46 -05:00
season = self . _download_json (
2024-12-30 13:56:00 -06:00
f ' https://appservice.puhutv.com/api/seasons/ { season_id } /episodes?v=2 ' ,
season_id , f ' Downloading episode { ( page - 1 ) * per } - { page * per } metadata ' , query = {
2018-07-22 08:25:46 -05:00
' page ' : page ,
2024-12-30 13:56:00 -06:00
' per ' : per ,
} ) [ ' data ' ]
episodes = season [ ' episodes ' ]
for episode in episodes :
video_id = episode [ ' id ' ]
video_title = episode [ ' name ' ]
slug = episode . get ( ' slug ' ) or episode [ ' assets ' ] [ 0 ] [ ' slug ' ]
if not slug :
continue
yield self . url_result ( f ' https://puhutv.com/ { slug } ' ,
ie = PuhuTVIE . ie_key ( ) , video_id = video_id ,
video_title = video_title )
2018-07-22 08:25:46 -05:00
page + = 1
2024-12-30 13:56:00 -06:00
has_more = season [ ' has_more ' ]
2018-04-24 11:02:38 -05:00
def _real_extract ( self , url ) :
playlist_id = self . _match_id ( url )
info = self . _download_json (
2024-06-11 18:09:58 -05:00
urljoin ( url , f ' /api/slug/ { playlist_id } -detay ' ) ,
2018-07-22 08:25:46 -05:00
playlist_id ) [ ' data ' ]
2018-04-24 11:02:38 -05:00
seasons = info . get ( ' seasons ' )
if seasons :
2018-07-22 08:25:46 -05:00
return self . playlist_result (
self . _extract_entries ( seasons ) , playlist_id , info . get ( ' name ' ) )
# For films, these are using same url with series
video_id = info . get ( ' slug ' ) or info [ ' assets ' ] [ 0 ] [ ' slug ' ]
return self . url_result (
2024-06-11 18:09:58 -05:00
f ' https://puhutv.com/ { video_id } -izle ' ,
2018-07-22 08:25:46 -05:00
PuhuTVIE . ie_key ( ) , video_id )