2024-02-24 18:16:34 -06:00
import datetime as dt
2022-03-02 11:08:13 -06:00
import functools
2022-03-17 07:22:14 -05:00
import itertools
2021-08-27 20:37:13 -05:00
import json
import re
2022-03-17 07:22:14 -05:00
import time
2024-03-31 19:01:09 -05:00
import urllib . parse
2023-05-29 04:35:10 -05:00
2021-08-27 20:37:13 -05:00
from . common import InfoExtractor , SearchInfoExtractor
2023-11-20 02:04:04 -06:00
from . . networking import Request
2023-07-09 02:53:02 -05:00
from . . networking . exceptions import HTTPError
2014-12-13 05:24:42 -06:00
from . . utils import (
2014-11-05 12:52:34 -06:00
ExtractorError ,
2022-03-17 07:22:14 -05:00
OnDemandPagedList ,
clean_html ,
2025-02-20 09:39:06 -06:00
determine_ext ,
2021-02-10 00:45:20 -06:00
float_or_none ,
2021-08-07 06:20:46 -05:00
int_or_none ,
2022-03-17 07:22:14 -05:00
join_nonempty ,
2014-12-13 05:24:42 -06:00
parse_duration ,
2015-04-29 11:24:24 -05:00
parse_iso8601 ,
2025-02-20 09:39:06 -06:00
parse_qs ,
2022-04-01 05:31:58 -05:00
parse_resolution ,
qualities ,
2017-08-06 11:19:46 -05:00
remove_start ,
2022-04-01 05:31:58 -05:00
str_or_none ,
2022-03-02 11:08:13 -06:00
traverse_obj ,
2017-08-06 10:07:06 -05:00
try_get ,
2022-03-02 11:08:13 -06:00
unescapeHTML ,
2022-03-17 07:22:14 -05:00
update_url_query ,
url_or_none ,
2016-03-25 15:19:24 -05:00
urlencode_postdata ,
2022-04-27 11:44:29 -05:00
urljoin ,
2013-11-23 03:19:44 -06:00
)
2013-11-23 23:37:14 -06:00
2013-11-23 03:19:44 -06:00
class NiconicoIE ( InfoExtractor ) :
2014-03-27 15:01:09 -05:00
IE_NAME = ' niconico '
IE_DESC = ' ニコニコ動画 '
2024-03-03 17:14:54 -06:00
_GEO_COUNTRIES = [ ' JP ' ]
_GEO_BYPASS = False
2013-11-23 03:19:44 -06:00
2015-03-20 09:53:14 -05:00
_TESTS = [ {
2014-03-27 15:01:09 -05:00
' url ' : ' http://www.nicovideo.jp/watch/sm22312215 ' ,
' info_dict ' : {
' id ' : ' sm22312215 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Big Buck Bunny ' ,
2017-08-06 10:07:06 -05:00
' thumbnail ' : r ' re:https?://.* ' ,
2014-03-27 15:01:09 -05:00
' uploader ' : ' takuya0301 ' ,
' uploader_id ' : ' 2698420 ' ,
' upload_date ' : ' 20131123 ' ,
2017-10-14 22:40:57 -05:00
' timestamp ' : int , # timestamp is unstable
2014-03-27 15:01:09 -05:00
' description ' : ' (c) copyright 2008, Blender Foundation / www.bigbuckbunny.org ' ,
2014-07-04 10:05:46 -05:00
' duration ' : 33 ,
2017-08-06 10:07:06 -05:00
' view_count ' : int ,
' comment_count ' : int ,
2024-02-29 16:55:44 -06:00
' genres ' : [ ' 未設定 ' ] ,
' tags ' : [ ] ,
2013-11-23 03:19:44 -06:00
} ,
2024-08-05 21:50:06 -05:00
' params ' : { ' skip_download ' : ' m3u8 ' } ,
2015-03-20 09:53:14 -05:00
} , {
2015-04-29 11:47:52 -05:00
# File downloaded with and without credentials are different, so omit
# the md5 field
2015-03-20 09:53:14 -05:00
' url ' : ' http://www.nicovideo.jp/watch/nm14296458 ' ,
' info_dict ' : {
' id ' : ' nm14296458 ' ,
2024-02-29 16:55:44 -06:00
' ext ' : ' mp4 ' ,
' title ' : ' 【Kagamine Rin】Dance on media【Original】take2! ' ,
' description ' : ' md5:9368f2b1f4178de64f2602c2f3d6cbf5 ' ,
2017-08-06 10:07:06 -05:00
' thumbnail ' : r ' re:https?://.* ' ,
2015-03-20 09:53:14 -05:00
' uploader ' : ' りょうた ' ,
' uploader_id ' : ' 18822557 ' ,
' upload_date ' : ' 20110429 ' ,
2015-04-29 11:24:24 -05:00
' timestamp ' : 1304065916 ,
2024-02-29 16:55:44 -06:00
' duration ' : 208.0 ,
' comment_count ' : int ,
' view_count ' : int ,
' genres ' : [ ' 音楽・サウンド ' ] ,
' tags ' : [ ' Translation_Request ' , ' Kagamine_Rin ' , ' Rin_Original ' ] ,
2015-03-20 09:53:14 -05:00
} ,
2024-08-05 21:50:06 -05:00
' params ' : { ' skip_download ' : ' m3u8 ' } ,
2015-04-29 11:24:24 -05:00
} , {
# 'video exists but is marked as "deleted"
2015-04-29 13:24:05 -05:00
# md5 is unstable
2015-04-29 11:24:24 -05:00
' url ' : ' http://www.nicovideo.jp/watch/sm10000 ' ,
' info_dict ' : {
' id ' : ' sm10000 ' ,
' ext ' : ' unknown_video ' ,
' description ' : ' deleted ' ,
' title ' : ' ドラえもんエターナル第3話「決戦第3新東京市」< 前編> ' ,
2017-08-06 10:07:06 -05:00
' thumbnail ' : r ' re:https?://.* ' ,
2015-04-29 13:24:05 -05:00
' upload_date ' : ' 20071224 ' ,
2017-01-15 08:10:57 -06:00
' timestamp ' : int , # timestamp field has different value if logged in
2015-04-29 13:24:05 -05:00
' duration ' : 304 ,
2017-08-06 10:07:06 -05:00
' view_count ' : int ,
2015-04-29 11:24:24 -05:00
} ,
2017-01-15 08:10:57 -06:00
' skip ' : ' Requires an account ' ,
2015-04-30 04:05:02 -05:00
} , {
' url ' : ' http://www.nicovideo.jp/watch/so22543406 ' ,
' info_dict ' : {
' id ' : ' 1388129933 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' 【第1回】RADIOアニメロミックス ラブライブ! ~ のぞえりRadio Garden~ ' ,
' description ' : ' md5:b27d224bb0ff53d3c8269e9f8b561cf1 ' ,
2017-08-06 10:07:06 -05:00
' thumbnail ' : r ' re:https?://.* ' ,
2015-04-30 04:05:02 -05:00
' timestamp ' : 1388851200 ,
' upload_date ' : ' 20140104 ' ,
' uploader ' : ' アニメロチャンネル ' ,
' uploader_id ' : ' 312 ' ,
2017-01-15 08:10:57 -06:00
} ,
' skip ' : ' The viewing period of the video you were searching for has expired. ' ,
2017-08-06 10:07:06 -05:00
} , {
2017-08-06 11:19:46 -05:00
# video not available via `getflv`; "old" HTML5 video
2017-08-06 10:07:06 -05:00
' url ' : ' http://www.nicovideo.jp/watch/sm1151009 ' ,
' info_dict ' : {
' id ' : ' sm1151009 ' ,
2017-08-06 11:19:46 -05:00
' ext ' : ' mp4 ' ,
2017-08-06 10:07:06 -05:00
' title ' : ' マスターシステム本体内蔵のスペハリのメインテーマ(PSG版) ' ,
2024-02-29 16:55:44 -06:00
' description ' : ' md5:f95a3d259172667b293530cc2e41ebda ' ,
2017-08-06 10:07:06 -05:00
' thumbnail ' : r ' re:https?://.* ' ,
' duration ' : 184 ,
2024-02-29 16:55:44 -06:00
' timestamp ' : 1190835883 ,
' upload_date ' : ' 20070926 ' ,
2017-08-06 10:07:06 -05:00
' uploader ' : ' denden2 ' ,
' uploader_id ' : ' 1392194 ' ,
' view_count ' : int ,
' comment_count ' : int ,
2024-02-29 16:55:44 -06:00
' genres ' : [ ' ゲーム ' ] ,
' tags ' : [ ] ,
2017-08-06 10:07:06 -05:00
} ,
2024-08-05 21:50:06 -05:00
' params ' : { ' skip_download ' : ' m3u8 ' } ,
2017-08-06 11:19:46 -05:00
} , {
# "New" HTML5 video
' url ' : ' http://www.nicovideo.jp/watch/sm31464864 ' ,
' info_dict ' : {
' id ' : ' sm31464864 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' 新作TVアニメ「戦姫絶唱シンフォギアAXZ」PV 最高画質 ' ,
' description ' : ' md5:e52974af9a96e739196b2c1ca72b5feb ' ,
2024-02-29 16:55:44 -06:00
' timestamp ' : 1498481660 ,
2017-08-06 11:19:46 -05:00
' upload_date ' : ' 20170626 ' ,
2024-02-29 16:55:44 -06:00
' uploader ' : ' no-namamae ' ,
2017-08-06 11:19:46 -05:00
' uploader_id ' : ' 40826363 ' ,
' thumbnail ' : r ' re:https?://.* ' ,
' duration ' : 198 ,
' view_count ' : int ,
' comment_count ' : int ,
2024-02-29 16:55:44 -06:00
' genres ' : [ ' アニメ ' ] ,
' tags ' : [ ] ,
2017-08-06 11:19:46 -05:00
} ,
2024-08-05 21:50:06 -05:00
' params ' : { ' skip_download ' : ' m3u8 ' } ,
2017-10-14 22:40:57 -05:00
} , {
# Video without owner
' url ' : ' http://www.nicovideo.jp/watch/sm18238488 ' ,
' info_dict ' : {
' id ' : ' sm18238488 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' 【実写版】ミュータントタートルズ ' ,
' description ' : ' md5:15df8988e47a86f9e978af2064bf6d8e ' ,
2024-02-29 16:55:44 -06:00
' timestamp ' : 1341128008 ,
2017-10-14 22:40:57 -05:00
' upload_date ' : ' 20120701 ' ,
' thumbnail ' : r ' re:https?://.* ' ,
' duration ' : 5271 ,
' view_count ' : int ,
' comment_count ' : int ,
2024-02-29 16:55:44 -06:00
' genres ' : [ ' エンターテイメント ' ] ,
' tags ' : [ ] ,
2017-10-14 22:40:57 -05:00
} ,
2024-08-05 21:50:06 -05:00
' params ' : { ' skip_download ' : ' m3u8 ' } ,
2017-06-25 14:30:05 -05:00
} , {
' url ' : ' http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg ' ,
' only_matching ' : True ,
2022-03-17 07:22:14 -05:00
} , {
' note ' : ' a video that is only served as an ENCRYPTED HLS. ' ,
' url ' : ' https://www.nicovideo.jp/watch/so38016254 ' ,
' only_matching ' : True ,
2015-03-20 09:53:14 -05:00
} ]
2013-11-23 03:19:44 -06:00
2022-03-17 07:22:14 -05:00
_VALID_URL = r ' https?://(?:(?:www \ .|secure \ .|sp \ .)?nicovideo \ .jp/watch|nico \ .ms)/(?P<id>(?:[a-z] {2} )?[0-9]+) '
2013-11-23 03:19:44 -06:00
_NETRC_MACHINE = ' niconico '
2021-04-05 02:41:21 -05:00
_API_HEADERS = {
' X-Frontend-ID ' : ' 6 ' ,
2022-03-17 07:22:14 -05:00
' X-Frontend-Version ' : ' 0 ' ,
' X-Niconico-Language ' : ' en-us ' ,
' Referer ' : ' https://www.nicovideo.jp/ ' ,
' Origin ' : ' https://www.nicovideo.jp ' ,
2021-04-05 02:41:21 -05:00
}
2022-03-18 15:53:33 -05:00
def _perform_login ( self , username , password ) :
2017-06-07 01:47:25 -05:00
login_ok = True
2013-11-23 03:19:44 -06:00
login_form_strs = {
2017-06-07 01:47:25 -05:00
' mail_tel ' : username ,
2014-03-27 15:01:09 -05:00
' password ' : password ,
2013-11-23 03:19:44 -06:00
}
2022-03-17 07:22:14 -05:00
self . _request_webpage (
' https://account.nicovideo.jp/login ' , None ,
note = ' Acquiring Login session ' )
2022-04-27 11:44:29 -05:00
page = self . _download_webpage (
2022-03-17 07:22:14 -05:00
' https://account.nicovideo.jp/login/redirector?show_button_twitter=1&site=niconico&show_button_facebook=1 ' , None ,
2017-06-07 01:47:25 -05:00
note = ' Logging in ' , errnote = ' Unable to log in ' ,
2022-03-17 07:22:14 -05:00
data = urlencode_postdata ( login_form_strs ) ,
headers = {
' Referer ' : ' https://account.nicovideo.jp/login ' ,
' Content-Type ' : ' application/x-www-form-urlencoded ' ,
} )
2022-04-27 11:44:29 -05:00
if ' oneTimePw ' in page :
post_url = self . _search_regex (
r ' <form[^>]+action=([ " \' ])(?P<url>.+?) \ 1 ' , page , ' post url ' , group = ' url ' )
page = self . _download_webpage (
urljoin ( ' https://account.nicovideo.jp ' , post_url ) , None ,
note = ' Performing MFA ' , errnote = ' Unable to complete MFA ' ,
data = urlencode_postdata ( {
2024-06-11 18:09:58 -05:00
' otp ' : self . _get_tfa_info ( ' 6 digits code ' ) ,
2022-04-27 11:44:29 -05:00
} ) , headers = {
' Content-Type ' : ' application/x-www-form-urlencoded ' ,
} )
if ' oneTimePw ' in page or ' formError ' in page :
err_msg = self . _html_search_regex (
r ' formError[ " \' ]+>(.*?)</div> ' , page , ' form_error ' ,
default = ' There \' s an error but the message can \' t be parsed. ' ,
flags = re . DOTALL )
self . report_warning ( f ' Unable to log in: MFA challenge failed, " { err_msg } " ' )
return False
login_ok = ' class= " notice error " ' not in page
2017-06-07 01:47:25 -05:00
if not login_ok :
2022-04-27 11:44:29 -05:00
self . report_warning ( ' Unable to log in: bad username or password ' )
2017-06-07 01:47:25 -05:00
return login_ok
2013-11-23 03:19:44 -06:00
2021-02-10 00:45:20 -06:00
def _get_heartbeat_info ( self , info_dict ) :
video_id , video_src_id , audio_src_id = info_dict [ ' url ' ] . split ( ' : ' ) [ 1 ] . split ( ' / ' )
2022-04-17 12:22:22 -05:00
dmc_protocol = info_dict [ ' expected_protocol ' ]
2017-08-06 11:19:46 -05:00
2021-04-05 02:41:21 -05:00
api_data = (
info_dict . get ( ' _api_data ' )
or self . _parse_json (
self . _html_search_regex (
' data-api-data= " ([^ " ]+) " ' ,
2022-11-06 23:56:28 -06:00
self . _download_webpage ( ' https://www.nicovideo.jp/watch/ ' + video_id , video_id ) ,
2021-04-05 02:41:21 -05:00
' API data ' , default = ' {} ' ) ,
video_id ) )
2021-02-10 00:45:20 -06:00
2021-03-17 05:00:24 -05:00
session_api_data = try_get ( api_data , lambda x : x [ ' media ' ] [ ' delivery ' ] [ ' movie ' ] [ ' session ' ] )
2021-02-10 00:45:20 -06:00
session_api_endpoint = try_get ( session_api_data , lambda x : x [ ' urls ' ] [ 0 ] )
2021-04-05 02:41:21 -05:00
def ping ( ) :
2022-03-17 07:22:14 -05:00
tracking_id = traverse_obj ( api_data , ( ' media ' , ' delivery ' , ' trackingId ' ) )
if tracking_id :
tracking_url = update_url_query ( ' https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch ' , { ' t ' : tracking_id } )
watch_request_response = self . _download_json (
tracking_url , video_id ,
note = ' Acquiring permission for downloading video ' , fatal = False ,
headers = self . _API_HEADERS )
if traverse_obj ( watch_request_response , ( ' meta ' , ' status ' ) ) != 200 :
self . report_warning ( ' Failed to acquire permission for playing video. Video download may fail. ' )
2021-02-10 00:45:20 -06:00
yesno = lambda x : ' yes ' if x else ' no '
2022-03-17 07:22:14 -05:00
if dmc_protocol == ' http ' :
protocol = ' http '
protocol_parameters = {
' http_output_download_parameters ' : {
' use_ssl ' : yesno ( session_api_data [ ' urls ' ] [ 0 ] [ ' isSsl ' ] ) ,
' use_well_known_port ' : yesno ( session_api_data [ ' urls ' ] [ 0 ] [ ' isWellKnownPort ' ] ) ,
2024-06-11 18:09:58 -05:00
} ,
2022-03-17 07:22:14 -05:00
}
elif dmc_protocol == ' hls ' :
2021-02-10 00:45:20 -06:00
protocol = ' m3u8 '
2022-03-17 07:22:14 -05:00
segment_duration = try_get ( self . _configuration_arg ( ' segment_duration ' ) , lambda x : int ( x [ 0 ] ) ) or 6000
parsed_token = self . _parse_json ( session_api_data [ ' token ' ] , video_id )
encryption = traverse_obj ( api_data , ( ' media ' , ' delivery ' , ' encryption ' ) )
protocol_parameters = {
' hls_parameters ' : {
' segment_duration ' : segment_duration ,
' transfer_preset ' : ' ' ,
' use_ssl ' : yesno ( session_api_data [ ' urls ' ] [ 0 ] [ ' isSsl ' ] ) ,
' use_well_known_port ' : yesno ( session_api_data [ ' urls ' ] [ 0 ] [ ' isWellKnownPort ' ] ) ,
2024-06-11 18:09:58 -05:00
} ,
2021-02-10 00:45:20 -06:00
}
2022-03-17 07:22:14 -05:00
if ' hls_encryption ' in parsed_token and encryption :
protocol_parameters [ ' hls_parameters ' ] [ ' encryption ' ] = {
parsed_token [ ' hls_encryption ' ] : {
' encrypted_key ' : encryption [ ' encryptedKey ' ] ,
' key_uri ' : encryption [ ' keyUri ' ] ,
2024-06-11 18:09:58 -05:00
} ,
2021-02-10 00:45:20 -06:00
}
2022-03-17 07:22:14 -05:00
else :
protocol = ' m3u8_native '
else :
raise ExtractorError ( f ' Unsupported DMC protocol: { dmc_protocol } ' )
2017-08-06 11:19:46 -05:00
session_response = self . _download_json (
session_api_endpoint [ ' url ' ] , video_id ,
query = { ' _format ' : ' json ' } ,
headers = { ' Content-Type ' : ' application/json ' } ,
2024-06-11 18:09:58 -05:00
note = ' Downloading JSON metadata for {} ' . format ( info_dict [ ' format_id ' ] ) ,
2017-08-06 11:19:46 -05:00
data = json . dumps ( {
' session ' : {
' client_info ' : {
2021-03-17 05:00:24 -05:00
' player_id ' : session_api_data . get ( ' playerId ' ) ,
2017-08-06 11:19:46 -05:00
} ,
' content_auth ' : {
2021-03-17 05:00:24 -05:00
' auth_type ' : try_get ( session_api_data , lambda x : x [ ' authTypes ' ] [ session_api_data [ ' protocols ' ] [ 0 ] ] ) ,
' content_key_timeout ' : session_api_data . get ( ' contentKeyTimeout ' ) ,
2017-08-06 11:19:46 -05:00
' service_id ' : ' nicovideo ' ,
2024-06-11 18:09:58 -05:00
' service_user_id ' : session_api_data . get ( ' serviceUserId ' ) ,
2017-08-06 11:19:46 -05:00
} ,
2021-03-17 05:00:24 -05:00
' content_id ' : session_api_data . get ( ' contentId ' ) ,
2017-08-06 11:19:46 -05:00
' content_src_id_sets ' : [ {
' content_src_ids ' : [ {
' src_id_to_mux ' : {
2021-02-10 00:45:20 -06:00
' audio_src_ids ' : [ audio_src_id ] ,
' video_src_ids ' : [ video_src_id ] ,
2024-06-11 18:09:58 -05:00
} ,
} ] ,
2017-08-06 11:19:46 -05:00
} ] ,
' content_type ' : ' movie ' ,
' content_uri ' : ' ' ,
' keep_method ' : {
' heartbeat ' : {
2024-06-11 18:09:58 -05:00
' lifetime ' : session_api_data . get ( ' heartbeatLifetime ' ) ,
} ,
2017-08-06 11:19:46 -05:00
} ,
2022-03-17 07:22:14 -05:00
' priority ' : session_api_data [ ' priority ' ] ,
2017-08-06 11:19:46 -05:00
' protocol ' : {
' name ' : ' http ' ,
' parameters ' : {
2022-03-17 07:22:14 -05:00
' http_parameters ' : {
2024-06-11 18:09:58 -05:00
' parameters ' : protocol_parameters ,
} ,
} ,
2017-08-06 11:19:46 -05:00
} ,
2021-03-17 05:00:24 -05:00
' recipe_id ' : session_api_data . get ( ' recipeId ' ) ,
2017-08-06 11:19:46 -05:00
' session_operation_auth ' : {
' session_operation_auth_by_signature ' : {
2021-02-10 00:45:20 -06:00
' signature ' : session_api_data . get ( ' signature ' ) ,
' token ' : session_api_data . get ( ' token ' ) ,
2024-06-11 18:09:58 -05:00
} ,
2017-08-06 11:19:46 -05:00
} ,
2024-06-11 18:09:58 -05:00
' timing_constraint ' : ' unlimited ' ,
} ,
2018-09-01 04:04:45 -05:00
} ) . encode ( ) )
2017-08-06 11:19:46 -05:00
2021-02-10 00:45:20 -06:00
info_dict [ ' url ' ] = session_response [ ' data ' ] [ ' session ' ] [ ' content_uri ' ]
info_dict [ ' protocol ' ] = protocol
# get heartbeat info
heartbeat_info_dict = {
' url ' : session_api_endpoint [ ' url ' ] + ' / ' + session_response [ ' data ' ] [ ' session ' ] [ ' id ' ] + ' ?_format=json&_method=PUT ' ,
' data ' : json . dumps ( session_response [ ' data ' ] ) ,
# interval, convert milliseconds to seconds, then halve to make a buffer.
2021-04-05 02:41:21 -05:00
' interval ' : float_or_none ( session_api_data . get ( ' heartbeatLifetime ' ) , scale = 3000 ) ,
2024-06-11 18:09:58 -05:00
' ping ' : ping ,
2021-02-10 00:45:20 -06:00
}
return info_dict , heartbeat_info_dict
2022-03-17 07:22:14 -05:00
def _extract_format_for_quality ( self , video_id , audio_quality , video_quality , dmc_protocol ) :
if not audio_quality . get ( ' isAvailable ' ) or not video_quality . get ( ' isAvailable ' ) :
return None
format_id = ' - ' . join (
[ remove_start ( s [ ' id ' ] , ' archive_ ' ) for s in ( video_quality , audio_quality ) ] + [ dmc_protocol ] )
vid_qual_label = traverse_obj ( video_quality , ( ' metadata ' , ' label ' ) )
2017-08-06 11:19:46 -05:00
return {
2024-06-11 18:09:58 -05:00
' url ' : ' niconico_dmc: {} / {} / {} ' . format ( video_id , video_quality [ ' id ' ] , audio_quality [ ' id ' ] ) ,
2017-08-06 11:19:46 -05:00
' format_id ' : format_id ,
2022-03-17 07:22:14 -05:00
' format_note ' : join_nonempty ( ' DMC ' , vid_qual_label , dmc_protocol . upper ( ) , delim = ' ' ) ,
2017-08-06 11:19:46 -05:00
' ext ' : ' mp4 ' , # Session API are used in HTML5, which always serves mp4
2022-03-17 07:22:14 -05:00
' acodec ' : ' aac ' ,
' vcodec ' : ' h264 ' ,
2024-02-29 16:55:44 -06:00
* * traverse_obj ( audio_quality , ( ' metadata ' , {
2024-11-03 18:33:21 -06:00
' abr ' : ( ' bitrate ' , { float_or_none ( scale = 1000 ) } ) ,
2024-02-29 16:55:44 -06:00
' asr ' : ( ' samplingRate ' , { int_or_none } ) ,
} ) ) ,
* * traverse_obj ( video_quality , ( ' metadata ' , {
2024-11-03 18:33:21 -06:00
' vbr ' : ( ' bitrate ' , { float_or_none ( scale = 1000 ) } ) ,
2024-02-29 16:55:44 -06:00
' height ' : ( ' resolution ' , ' height ' , { int_or_none } ) ,
' width ' : ( ' resolution ' , ' width ' , { int_or_none } ) ,
} ) ) ,
2022-03-17 07:22:14 -05:00
' quality ' : - 2 if ' low ' in video_quality [ ' id ' ] else None ,
' protocol ' : ' niconico_dmc ' ,
2022-04-17 12:22:22 -05:00
' expected_protocol ' : dmc_protocol , # XXX: This is not a documented field
2021-02-10 00:45:20 -06:00
' http_headers ' : {
' Origin ' : ' https://www.nicovideo.jp ' ,
' Referer ' : ' https://www.nicovideo.jp/watch/ ' + video_id ,
2024-06-11 18:09:58 -05:00
} ,
2017-08-06 11:19:46 -05:00
}
2024-02-29 16:55:44 -06:00
def _yield_dmc_formats ( self , api_data , video_id ) :
dmc_data = traverse_obj ( api_data , ( ' media ' , ' delivery ' , ' movie ' ) )
audios = traverse_obj ( dmc_data , ( ' audios ' , . . . , { dict } ) )
videos = traverse_obj ( dmc_data , ( ' videos ' , . . . , { dict } ) )
protocols = traverse_obj ( dmc_data , ( ' session ' , ' protocols ' , . . . , { str } ) )
if not all ( ( audios , videos , protocols ) ) :
return
for audio_quality , video_quality , protocol in itertools . product ( audios , videos , protocols ) :
if fmt := self . _extract_format_for_quality ( video_id , audio_quality , video_quality , protocol ) :
yield fmt
def _yield_dms_formats ( self , api_data , video_id ) :
fmt_filter = lambda _ , v : v [ ' isAvailable ' ] and v [ ' id ' ]
videos = traverse_obj ( api_data , ( ' media ' , ' domand ' , ' videos ' , fmt_filter ) )
audios = traverse_obj ( api_data , ( ' media ' , ' domand ' , ' audios ' , fmt_filter ) )
access_key = traverse_obj ( api_data , ( ' media ' , ' domand ' , ' accessRightKey ' , { str } ) )
track_id = traverse_obj ( api_data , ( ' client ' , ' watchTrackId ' , { str } ) )
if not all ( ( videos , audios , access_key , track_id ) ) :
return
dms_m3u8_url = self . _download_json (
f ' https://nvapi.nicovideo.jp/v1/watch/ { video_id } /access-rights/hls ' , video_id ,
data = json . dumps ( {
2024-06-11 18:09:58 -05:00
' outputs ' : list ( itertools . product ( ( v [ ' id ' ] for v in videos ) , ( a [ ' id ' ] for a in audios ) ) ) ,
2024-02-29 16:55:44 -06:00
} ) . encode ( ) , query = { ' actionTrackId ' : track_id } , headers = {
' x-access-right-key ' : access_key ,
' x-frontend-id ' : 6 ,
' x-frontend-version ' : 0 ,
' x-request-with ' : ' https://www.nicovideo.jp ' ,
} ) [ ' data ' ] [ ' contentUrl ' ]
# Getting all audio formats results in duplicate video formats which we filter out later
2024-09-26 18:27:16 -05:00
dms_fmts = self . _extract_m3u8_formats ( dms_m3u8_url , video_id , ' mp4 ' )
2024-02-29 16:55:44 -06:00
# m3u8 extraction does not provide audio bitrates, so extract from the API data and fix
for audio_fmt in traverse_obj ( dms_fmts , lambda _ , v : v [ ' vcodec ' ] == ' none ' ) :
yield {
* * audio_fmt ,
* * traverse_obj ( audios , ( lambda _ , v : audio_fmt [ ' format_id ' ] . startswith ( v [ ' id ' ] ) , {
' format_id ' : ( ' id ' , { str } ) ,
2024-11-03 18:33:21 -06:00
' abr ' : ( ' bitRate ' , { float_or_none ( scale = 1000 ) } ) ,
2024-02-29 16:55:44 -06:00
' asr ' : ( ' samplingRate ' , { int_or_none } ) ,
2025-02-22 13:09:26 -06:00
' quality ' : ( ' qualityLevel ' , { int_or_none } ) ,
2024-02-29 16:55:44 -06:00
} ) , get_all = False ) ,
' acodec ' : ' aac ' ,
}
# Sort before removing dupes to keep the format dicts with the lowest tbr
video_fmts = sorted ( ( fmt for fmt in dms_fmts if fmt [ ' vcodec ' ] != ' none ' ) , key = lambda f : f [ ' tbr ' ] )
self . _remove_duplicate_formats ( video_fmts )
2025-02-22 13:09:26 -06:00
def find_dms_format ( search_target , target_fmt_id ) :
for item in search_target :
if item . get ( ' id ' , { str } ) == target_fmt_id :
return item
return None
2024-02-29 16:55:44 -06:00
# Calculate the true vbr/tbr by subtracting the lowest abr
min_abr = min ( traverse_obj ( audios , ( . . . , ' bitRate ' , { float_or_none } ) ) , default = 0 ) / 1000
for video_fmt in video_fmts :
video_fmt [ ' tbr ' ] - = min_abr
2025-02-22 13:09:26 -06:00
video_fmt_id = self . _search_regex ( r ' https://delivery.domand.nicovideo.jp/hlsbid/[a-z0-9]+/playlists/media/([a-z0-9 \ -]+).m3u8 ' , video_fmt [ ' url ' ] , ' video format id ' , fatal = False )
if video_fmt_id :
dms_video_fmt = find_dms_format ( videos , video_fmt_id )
if dms_video_fmt :
video_fmt [ ' format_id ' ] = video_fmt_id
video_fmt [ ' quality ' ] = dms_video_fmt . get ( ' qualityLevel ' , { int } )
yield video_fmt
continue
2024-02-29 16:55:44 -06:00
video_fmt [ ' format_id ' ] = f ' video- { video_fmt [ " tbr " ] : .0f } '
yield video_fmt
2013-11-23 03:19:44 -06:00
def _real_extract ( self , url ) :
2015-03-09 12:12:41 -05:00
video_id = self . _match_id ( url )
2013-11-23 03:19:44 -06:00
2022-03-17 07:22:14 -05:00
try :
webpage , handle = self . _download_webpage_handle (
2022-11-06 23:56:28 -06:00
' https://www.nicovideo.jp/watch/ ' + video_id , video_id )
2022-03-17 07:22:14 -05:00
if video_id . startswith ( ' so ' ) :
2023-07-09 02:53:02 -05:00
video_id = self . _match_id ( handle . url )
2021-02-10 00:45:20 -06:00
2024-08-05 21:50:06 -05:00
api_data = traverse_obj (
self . _parse_json ( self . _html_search_meta ( ' server-response ' , webpage ) or ' ' , video_id ) ,
( ' data ' , ' response ' , { dict } ) )
if not api_data :
raise ExtractorError ( ' Server response data not found ' )
2022-03-17 07:22:14 -05:00
except ExtractorError as e :
try :
api_data = self . _download_json (
2024-06-11 18:09:58 -05:00
f ' https://www.nicovideo.jp/api/watch/v3/ { video_id } ?_frontendId=6&_frontendVersion=0&actionTrackId=AAAAAAAAAA_ { round ( time . time ( ) * 1000 ) } ' , video_id ,
2022-03-17 07:22:14 -05:00
note = ' Downloading API JSON ' , errnote = ' Unable to fetch data ' ) [ ' data ' ]
except ExtractorError :
2023-07-09 02:53:02 -05:00
if not isinstance ( e . cause , HTTPError ) :
2022-03-17 07:22:14 -05:00
raise
2023-07-09 02:53:02 -05:00
webpage = e . cause . response . read ( ) . decode ( ' utf-8 ' , ' replace ' )
2022-03-17 07:22:14 -05:00
error_msg = self . _html_search_regex (
r ' (?s)<section \ s+class= " (?:(?:ErrorMessage|WatchExceptionPage-message) \ s*)+ " >(.+?)</section> ' ,
webpage , ' error reason ' , default = None )
if not error_msg :
raise
2024-02-29 16:55:44 -06:00
raise ExtractorError ( clean_html ( error_msg ) , expected = True )
2024-03-03 17:14:54 -06:00
availability = self . _availability ( * * ( traverse_obj ( api_data , ( ' payment ' , ' video ' , {
' needs_premium ' : ( ' isPremium ' , { bool } ) ,
' needs_subscription ' : ( ' isAdmission ' , { bool } ) ,
} ) ) or { ' needs_auth ' : True } ) )
formats = [ * self . _yield_dmc_formats ( api_data , video_id ) ,
* self . _yield_dms_formats ( api_data , video_id ) ]
if not formats :
fail_msg = clean_html ( self . _html_search_regex (
2024-02-29 16:55:44 -06:00
r ' <p[^>]+ \ bclass= " fail-message " [^>]*>(?P<msg>.+?)</p> ' ,
2024-03-03 17:14:54 -06:00
webpage , ' fail message ' , default = None , group = ' msg ' ) )
2024-02-29 16:55:44 -06:00
if fail_msg :
2024-03-03 17:14:54 -06:00
self . to_screen ( f ' Niconico said: { fail_msg } ' )
if fail_msg and ' された地域と同じ地域からのみ視聴できます。 ' in fail_msg :
availability = None
self . raise_geo_restricted ( countries = self . _GEO_COUNTRIES , metadata_available = True )
elif availability == ' premium_only ' :
self . raise_login_required ( ' This video requires premium ' , metadata_available = True )
elif availability == ' subscriber_only ' :
self . raise_login_required ( ' This video is for members only ' , metadata_available = True )
elif availability == ' needs_auth ' :
self . raise_login_required ( metadata_available = False )
2021-02-10 00:45:20 -06:00
2013-11-23 03:19:44 -06:00
# Start extracting information
2022-03-17 07:22:14 -05:00
tags = None
if webpage :
# use og:video:tag (not logged in)
og_video_tags = re . finditer ( r ' <meta \ s+property= " og:video:tag " \ s*content= " (.*?) " > ' , webpage )
tags = list ( filter ( None , ( clean_html ( x . group ( 1 ) ) for x in og_video_tags ) ) )
if not tags :
# use keywords and split with comma (not logged in)
kwds = self . _html_search_meta ( ' keywords ' , webpage , default = None )
if kwds :
tags = [ x for x in kwds . split ( ' , ' ) if x ]
if not tags :
# find in json (logged in)
tags = traverse_obj ( api_data , ( ' tag ' , ' items ' , . . . , ' name ' ) )
2013-11-23 03:19:44 -06:00
2022-04-01 05:31:58 -05:00
thumb_prefs = qualities ( [ ' url ' , ' middleUrl ' , ' largeUrl ' , ' player ' , ' ogp ' ] )
2024-02-29 16:55:44 -06:00
def get_video_info ( * items , get_first = True , * * kwargs ) :
return traverse_obj ( api_data , ( ' video ' , * items ) , get_all = not get_first , * * kwargs )
2015-04-29 13:24:05 -05:00
return {
2014-03-27 15:01:09 -05:00
' id ' : video_id ,
2021-04-05 02:41:21 -05:00
' _api_data ' : api_data ,
2022-03-17 07:22:14 -05:00
' title ' : get_video_info ( ( ' originalTitle ' , ' title ' ) ) or self . _og_search_title ( webpage , default = None ) ,
2024-03-03 17:14:54 -06:00
' formats ' : formats ,
' availability ' : availability ,
2022-04-01 05:31:58 -05:00
' thumbnails ' : [ {
' id ' : key ,
' url ' : url ,
' ext ' : ' jpg ' ,
' preference ' : thumb_prefs ( key ) ,
* * parse_resolution ( url , lenient = True ) ,
} for key , url in ( get_video_info ( ' thumbnail ' ) or { } ) . items ( ) if url ] ,
2022-03-17 07:22:14 -05:00
' description ' : clean_html ( get_video_info ( ' description ' ) ) ,
2022-04-01 05:31:58 -05:00
' uploader ' : traverse_obj ( api_data , ( ' owner ' , ' nickname ' ) , ( ' channel ' , ' name ' ) , ( ' community ' , ' name ' ) ) ,
' uploader_id ' : str_or_none ( traverse_obj ( api_data , ( ' owner ' , ' id ' ) , ( ' channel ' , ' id ' ) , ( ' community ' , ' id ' ) ) ) ,
2022-03-17 07:22:14 -05:00
' timestamp ' : parse_iso8601 ( get_video_info ( ' registeredAt ' ) ) or parse_iso8601 (
self . _html_search_meta ( ' video:release_date ' , webpage , ' date published ' , default = None ) ) ,
' channel ' : traverse_obj ( api_data , ( ' channel ' , ' name ' ) , ( ' community ' , ' name ' ) ) ,
' channel_id ' : traverse_obj ( api_data , ( ' channel ' , ' id ' ) , ( ' community ' , ' id ' ) ) ,
' view_count ' : int_or_none ( get_video_info ( ' count ' , ' view ' ) ) ,
' tags ' : tags ,
' genre ' : traverse_obj ( api_data , ( ' genre ' , ' label ' ) , ( ' genre ' , ' key ' ) ) ,
' comment_count ' : get_video_info ( ' count ' , ' comment ' , expected_type = int ) ,
' duration ' : (
parse_duration ( self . _html_search_meta ( ' video:duration ' , webpage , ' video duration ' , default = None ) )
or get_video_info ( ' duration ' ) ) ,
' webpage_url ' : url_or_none ( url ) or f ' https://www.nicovideo.jp/watch/ { video_id } ' ,
2024-02-17 16:51:43 -06:00
' subtitles ' : self . extract_subtitles ( video_id , api_data ) ,
2013-11-23 03:19:44 -06:00
}
2014-10-29 05:04:48 -05:00
2024-02-17 16:51:43 -06:00
def _get_subtitles ( self , video_id , api_data ) :
comments_info = traverse_obj ( api_data , ( ' comment ' , ' nvComment ' , { dict } ) ) or { }
2024-02-29 16:55:44 -06:00
if not comments_info . get ( ' server ' ) :
return
2024-02-17 16:51:43 -06:00
danmaku = traverse_obj ( self . _download_json (
2024-02-29 16:55:44 -06:00
f ' { comments_info [ " server " ] } /v1/threads ' , video_id , data = json . dumps ( {
2023-04-11 11:19:34 -05:00
' additionals ' : { } ,
2024-02-17 16:51:43 -06:00
' params ' : comments_info . get ( ' params ' ) ,
' threadKey ' : comments_info . get ( ' threadKey ' ) ,
2023-04-11 11:19:34 -05:00
} ) . encode ( ) , fatal = False ,
headers = {
' Referer ' : ' https://www.nicovideo.jp/ ' ,
' Origin ' : ' https://www.nicovideo.jp ' ,
' Content-Type ' : ' text/plain;charset=UTF-8 ' ,
' x-client-os-type ' : ' others ' ,
' x-frontend-id ' : ' 6 ' ,
' x-frontend-version ' : ' 0 ' ,
} ,
2024-02-17 16:51:43 -06:00
note = ' Downloading comments ' , errnote = ' Failed to download comments ' ) ,
( ' data ' , ' threads ' , . . . , ' comments ' , . . . ) )
return {
' comments ' : [ {
' ext ' : ' json ' ,
' data ' : json . dumps ( danmaku ) ,
} ] ,
}
2022-03-17 07:22:14 -05:00
2014-10-29 05:04:48 -05:00
2022-03-02 11:08:13 -06:00
class NiconicoPlaylistBaseIE ( InfoExtractor ) :
_PAGE_SIZE = 100
_API_HEADERS = {
' X-Frontend-ID ' : ' 6 ' ,
' X-Frontend-Version ' : ' 0 ' ,
2024-06-11 18:09:58 -05:00
' X-Niconico-Language ' : ' en-us ' ,
2022-03-02 11:08:13 -06:00
}
def _call_api ( self , list_id , resource , query ) :
2022-09-21 15:07:44 -05:00
raise NotImplementedError ( ' Must be implemented in subclasses ' )
2022-03-02 11:08:13 -06:00
@staticmethod
def _parse_owner ( item ) :
return {
2025-01-25 19:47:15 -06:00
' uploader ' : traverse_obj ( item , ( ' owner ' , ( ' name ' , ( ' user ' , ' nickname ' ) ) , { str } , any ) ) ,
' uploader_id ' : traverse_obj ( item , ( ' owner ' , ' id ' , { str } ) ) ,
2022-03-02 11:08:13 -06:00
}
def _fetch_page ( self , list_id , page ) :
page + = 1
2024-06-11 18:09:58 -05:00
resp = self . _call_api ( list_id , f ' page { page } ' , {
2022-03-02 11:08:13 -06:00
' page ' : page ,
' pageSize ' : self . _PAGE_SIZE ,
} )
# this is needed to support both mylist and user
for video in traverse_obj ( resp , ( ' items ' , . . . , ( ' video ' , None ) ) ) or [ ] :
video_id = video . get ( ' id ' )
if not video_id :
# skip {"video": {"id": "blablabla", ...}}
continue
count = video . get ( ' count ' ) or { }
get_count = lambda x : int_or_none ( count . get ( x ) )
yield {
' _type ' : ' url ' ,
' id ' : video_id ,
' title ' : video . get ( ' title ' ) ,
' url ' : f ' https://www.nicovideo.jp/watch/ { video_id } ' ,
' description ' : video . get ( ' shortDescription ' ) ,
' duration ' : int_or_none ( video . get ( ' duration ' ) ) ,
' view_count ' : get_count ( ' view ' ) ,
' comment_count ' : get_count ( ' comment ' ) ,
' thumbnail ' : traverse_obj ( video , ( ' thumbnail ' , ( ' nHdUrl ' , ' largeUrl ' , ' listingUrl ' , ' url ' ) ) ) ,
' ie_key ' : NiconicoIE . ie_key ( ) ,
* * self . _parse_owner ( video ) ,
}
def _entries ( self , list_id ) :
return OnDemandPagedList ( functools . partial ( self . _fetch_page , list_id ) , self . _PAGE_SIZE )
class NiconicoPlaylistIE ( NiconicoPlaylistBaseIE ) :
IE_NAME = ' niconico:playlist '
_VALID_URL = r ' https?://(?:(?:www \ .|sp \ .)?nicovideo \ .jp|nico \ .ms)/(?:user/ \ d+/)?(?:my/)?mylist/(?:#/)?(?P<id> \ d+) '
2014-10-29 05:04:48 -05:00
2021-01-01 06:26:37 -06:00
_TESTS = [ {
2014-10-29 05:04:48 -05:00
' url ' : ' http://www.nicovideo.jp/mylist/27411728 ' ,
' info_dict ' : {
' id ' : ' 27411728 ' ,
' title ' : ' AKB48のオールナイトニッポン ' ,
2021-01-01 06:26:37 -06:00
' description ' : ' md5:d89694c5ded4b6c693dea2db6e41aa08 ' ,
' uploader ' : ' のっく ' ,
' uploader_id ' : ' 805442 ' ,
2014-10-29 05:04:48 -05:00
} ,
2022-03-02 11:08:13 -06:00
' playlist_mincount ' : 291 ,
2021-01-01 06:26:37 -06:00
} , {
' url ' : ' https://www.nicovideo.jp/user/805442/mylist/27411728 ' ,
' only_matching ' : True ,
2022-03-02 11:08:13 -06:00
} , {
' url ' : ' https://www.nicovideo.jp/my/mylist/#/68048635 ' ,
' only_matching ' : True ,
2021-01-01 06:26:37 -06:00
} ]
2014-10-29 05:04:48 -05:00
2022-03-02 11:08:13 -06:00
def _call_api ( self , list_id , resource , query ) :
return self . _download_json (
f ' https://nvapi.nicovideo.jp/v2/mylists/ { list_id } ' , list_id ,
f ' Downloading { resource } ' , query = query ,
headers = self . _API_HEADERS ) [ ' data ' ] [ ' mylist ' ]
2021-04-05 02:41:21 -05:00
2014-10-29 05:04:48 -05:00
def _real_extract ( self , url ) :
list_id = self . _match_id ( url )
2022-03-02 11:08:13 -06:00
mylist = self . _call_api ( list_id , ' list ' , {
' pageSize ' : 1 ,
} )
return self . playlist_result (
self . _entries ( list_id ) , list_id ,
mylist . get ( ' name ' ) , mylist . get ( ' description ' ) , * * self . _parse_owner ( mylist ) )
2021-02-10 00:45:20 -06:00
2025-01-25 19:47:15 -06:00
class NiconicoSeriesIE ( NiconicoPlaylistBaseIE ) :
2022-03-02 11:08:13 -06:00
IE_NAME = ' niconico:series '
2023-04-25 05:21:06 -05:00
_VALID_URL = r ' https?://(?:(?:www \ .|sp \ .)?nicovideo \ .jp(?:/user/ \ d+)?|nico \ .ms)/series/(?P<id> \ d+) '
2022-03-02 11:08:13 -06:00
_TESTS = [ {
2023-04-25 05:21:06 -05:00
' url ' : ' https://www.nicovideo.jp/user/44113208/series/110226 ' ,
2022-03-02 11:08:13 -06:00
' info_dict ' : {
' id ' : ' 110226 ' ,
' title ' : ' ご立派ァ!のシリーズ ' ,
2025-01-25 19:47:15 -06:00
' description ' : ' 楽しそうな外人の吹き替えをさせたら終身名誉ホモガキの右に出る人はいませんね… ' ,
' uploader ' : ' アルファるふぁ ' ,
' uploader_id ' : ' 44113208 ' ,
2022-03-02 11:08:13 -06:00
} ,
2022-06-02 11:02:42 -05:00
' playlist_mincount ' : 10 ,
2022-03-02 11:08:13 -06:00
} , {
' url ' : ' https://www.nicovideo.jp/series/12312/ ' ,
' info_dict ' : {
' id ' : ' 12312 ' ,
' title ' : ' バトルスピリッツ お勧めカード紹介(調整中) ' ,
2025-01-25 19:47:15 -06:00
' description ' : ' ' ,
' uploader ' : ' 野鳥 ' ,
' uploader_id ' : ' 2275360 ' ,
2022-03-02 11:08:13 -06:00
} ,
2022-06-02 11:02:42 -05:00
' playlist_mincount ' : 103 ,
2022-03-02 11:08:13 -06:00
} , {
' url ' : ' https://nico.ms/series/203559 ' ,
' only_matching ' : True ,
} ]
2025-01-25 19:47:15 -06:00
def _call_api ( self , list_id , resource , query ) :
return self . _download_json (
f ' https://nvapi.nicovideo.jp/v2/series/ { list_id } ' , list_id ,
f ' Downloading { resource } ' , query = query ,
headers = self . _API_HEADERS ) [ ' data ' ]
2022-03-02 11:08:13 -06:00
def _real_extract ( self , url ) :
list_id = self . _match_id ( url )
2025-01-25 19:47:15 -06:00
series = self . _call_api ( list_id , ' list ' , {
' pageSize ' : 1 ,
} ) [ ' detail ' ]
return self . playlist_result (
self . _entries ( list_id ) , list_id ,
series . get ( ' title ' ) , series . get ( ' description ' ) , * * self . _parse_owner ( series ) )
2022-03-02 11:08:13 -06:00
class NiconicoHistoryIE ( NiconicoPlaylistBaseIE ) :
IE_NAME = ' niconico:history '
2023-02-03 12:50:06 -06:00
IE_DESC = ' NicoNico user history or likes. Requires cookies. '
_VALID_URL = r ' https?://(?:www \ .|sp \ .)?nicovideo \ .jp/my/(?P<id>history(?:/like)?) '
2022-03-02 11:08:13 -06:00
_TESTS = [ {
' note ' : ' PC page, with /video ' ,
' url ' : ' https://www.nicovideo.jp/my/history/video ' ,
' only_matching ' : True ,
} , {
' note ' : ' PC page, without /video ' ,
' url ' : ' https://www.nicovideo.jp/my/history ' ,
' only_matching ' : True ,
} , {
' note ' : ' mobile page, with /video ' ,
' url ' : ' https://sp.nicovideo.jp/my/history/video ' ,
' only_matching ' : True ,
} , {
' note ' : ' mobile page, without /video ' ,
' url ' : ' https://sp.nicovideo.jp/my/history ' ,
' only_matching ' : True ,
2023-02-03 12:50:06 -06:00
} , {
' note ' : ' PC page ' ,
' url ' : ' https://www.nicovideo.jp/my/history/like ' ,
' only_matching ' : True ,
} , {
' note ' : ' Mobile page ' ,
' url ' : ' https://sp.nicovideo.jp/my/history/like ' ,
' only_matching ' : True ,
2022-03-02 11:08:13 -06:00
} ]
def _call_api ( self , list_id , resource , query ) :
2023-02-03 12:50:06 -06:00
path = ' likes ' if list_id == ' history/like ' else ' watch/history '
2022-03-02 11:08:13 -06:00
return self . _download_json (
2023-02-03 12:50:06 -06:00
f ' https://nvapi.nicovideo.jp/v1/users/me/ { path } ' , list_id ,
f ' Downloading { resource } ' , query = query , headers = self . _API_HEADERS ) [ ' data ' ]
2022-03-02 11:08:13 -06:00
def _real_extract ( self , url ) :
2023-02-03 12:50:06 -06:00
list_id = self . _match_id ( url )
2022-03-02 11:08:13 -06:00
try :
2023-02-03 12:50:06 -06:00
mylist = self . _call_api ( list_id , ' list ' , { ' pageSize ' : 1 } )
2022-03-02 11:08:13 -06:00
except ExtractorError as e :
2023-07-09 02:53:02 -05:00
if isinstance ( e . cause , HTTPError ) and e . cause . status == 401 :
2023-02-03 12:50:06 -06:00
self . raise_login_required ( ' You have to be logged in to get your history ' )
2022-03-02 11:08:13 -06:00
raise
return self . playlist_result ( self . _entries ( list_id ) , list_id , * * self . _parse_owner ( mylist ) )
2021-03-20 04:07:57 -05:00
2021-12-02 15:46:08 -06:00
class NicovideoSearchBaseIE ( InfoExtractor ) :
2022-02-15 11:12:39 -06:00
_SEARCH_TYPE = ' search '
2021-12-02 15:46:08 -06:00
def _entries ( self , url , item_id , query = None , note = ' Downloading page %(page)s ' ) :
query = query or { }
pages = [ query [ ' page ' ] ] if ' page ' in query else itertools . count ( 1 )
for page_num in pages :
query [ ' page ' ] = str ( page_num )
webpage = self . _download_webpage ( url , item_id , query = query , note = note % { ' page ' : page_num } )
results = re . findall ( r ' (?<=data-video-id=)[ " \' ]?(?P<videoid>.*?)(?=[ " \' ]) ' , webpage )
for item in results :
2022-11-06 23:56:28 -06:00
yield self . url_result ( f ' https://www.nicovideo.jp/watch/ { item } ' , ' Niconico ' , item )
2021-12-02 15:46:08 -06:00
if not results :
break
2021-12-06 11:54:42 -06:00
def _search_results ( self , query ) :
return self . _entries (
2022-02-15 11:12:39 -06:00
self . _proto_relative_url ( f ' //www.nicovideo.jp/ { self . _SEARCH_TYPE } / { query } ' ) , query )
2021-12-06 11:54:42 -06:00
2021-12-02 15:46:08 -06:00
class NicovideoSearchIE ( NicovideoSearchBaseIE , SearchInfoExtractor ) :
IE_DESC = ' Nico video search '
IE_NAME = ' nicovideo:search '
_SEARCH_KEY = ' nicosearch '
2021-08-27 20:37:13 -05:00
2021-12-02 15:46:08 -06:00
class NicovideoSearchURLIE ( NicovideoSearchBaseIE ) :
IE_NAME = f ' { NicovideoSearchIE . IE_NAME } _url '
2021-08-27 20:37:13 -05:00
IE_DESC = ' Nico video search URLs '
_VALID_URL = r ' https?://(?:www \ .)?nicovideo \ .jp/search/(?P<id>[^?#&]+)? '
_TESTS = [ {
' url ' : ' http://www.nicovideo.jp/search/sm9 ' ,
' info_dict ' : {
' id ' : ' sm9 ' ,
2024-06-11 18:09:58 -05:00
' title ' : ' sm9 ' ,
2021-08-27 20:37:13 -05:00
} ,
' playlist_mincount ' : 40 ,
} , {
' url ' : ' https://www.nicovideo.jp/search/sm9?sort=h&order=d&end=2020-12-31&start=2020-01-01 ' ,
' info_dict ' : {
' id ' : ' sm9 ' ,
2024-06-11 18:09:58 -05:00
' title ' : ' sm9 ' ,
2021-08-27 20:37:13 -05:00
} ,
' playlist_count ' : 31 ,
} ]
def _real_extract ( self , url ) :
query = self . _match_id ( url )
return self . playlist_result ( self . _entries ( url , query ) , query , query )
2021-12-02 15:46:08 -06:00
class NicovideoSearchDateIE ( NicovideoSearchBaseIE , SearchInfoExtractor ) :
2021-11-28 15:22:52 -06:00
IE_DESC = ' Nico video search, newest first '
2021-12-02 15:46:08 -06:00
IE_NAME = f ' { NicovideoSearchIE . IE_NAME } :date '
2021-08-27 20:37:13 -05:00
_SEARCH_KEY = ' nicosearchdate '
_TESTS = [ {
' url ' : ' nicosearchdateall:a ' ,
' info_dict ' : {
' id ' : ' a ' ,
2024-06-11 18:09:58 -05:00
' title ' : ' a ' ,
2021-08-27 20:37:13 -05:00
} ,
' playlist_mincount ' : 1610 ,
} ]
2024-02-24 18:16:34 -06:00
_START_DATE = dt . date ( 2007 , 1 , 1 )
2021-08-27 20:37:13 -05:00
_RESULTS_PER_PAGE = 32
_MAX_PAGES = 50
def _entries ( self , url , item_id , start_date = None , end_date = None ) :
2024-02-24 18:16:34 -06:00
start_date , end_date = start_date or self . _START_DATE , end_date or dt . datetime . now ( ) . date ( )
2021-08-27 20:37:13 -05:00
# If the last page has a full page of videos, we need to break down the query interval further
last_page_len = len ( list ( self . _get_entries_for_date (
url , item_id , start_date , end_date , self . _MAX_PAGES ,
note = f ' Checking number of videos from { start_date } to { end_date } ' ) ) )
if ( last_page_len == self . _RESULTS_PER_PAGE and start_date != end_date ) :
midpoint = start_date + ( ( end_date - start_date ) / / 2 )
yield from self . _entries ( url , item_id , midpoint , end_date )
yield from self . _entries ( url , item_id , start_date , midpoint )
else :
self . to_screen ( f ' { item_id } : Downloading results from { start_date } to { end_date } ' )
yield from self . _get_entries_for_date (
url , item_id , start_date , end_date , note = ' Downloading page %(page)s ' )
def _get_entries_for_date ( self , url , item_id , start_date , end_date = None , page_num = None , note = None ) :
query = {
' start ' : str ( start_date ) ,
' end ' : str ( end_date or start_date ) ,
' sort ' : ' f ' ,
' order ' : ' d ' ,
}
if page_num :
query [ ' page ' ] = str ( page_num )
2021-12-02 15:46:08 -06:00
yield from super ( ) . _entries ( url , item_id , query = query , note = note )
2021-08-27 20:37:13 -05:00
2022-02-15 11:12:39 -06:00
class NicovideoTagURLIE ( NicovideoSearchBaseIE ) :
IE_NAME = ' niconico:tag '
IE_DESC = ' NicoNico video tag URLs '
_SEARCH_TYPE = ' tag '
_VALID_URL = r ' https?://(?:www \ .)?nicovideo \ .jp/tag/(?P<id>[^?#&]+)? '
_TESTS = [ {
' url ' : ' https://www.nicovideo.jp/tag/ドキュメンタリー淫夢 ' ,
' info_dict ' : {
' id ' : ' ドキュメンタリー淫夢 ' ,
2024-06-11 18:09:58 -05:00
' title ' : ' ドキュメンタリー淫夢 ' ,
2022-02-15 11:12:39 -06:00
} ,
' playlist_mincount ' : 400 ,
} ]
def _real_extract ( self , url ) :
query = self . _match_id ( url )
return self . playlist_result ( self . _entries ( url , query ) , query , query )
2021-03-20 04:07:57 -05:00
class NiconicoUserIE ( InfoExtractor ) :
2024-10-26 08:52:09 -05:00
_VALID_URL = r ' https?://(?:www \ .)?nicovideo \ .jp/user/(?P<id> \ d+)(?:/video)?/?(?:$|[#?]) '
2021-03-20 04:07:57 -05:00
_TEST = {
' url ' : ' https://www.nicovideo.jp/user/419948 ' ,
' info_dict ' : {
' id ' : ' 419948 ' ,
} ,
' playlist_mincount ' : 101 ,
}
2024-10-26 08:52:09 -05:00
_API_URL = ' https://nvapi.nicovideo.jp/v2/users/ %s /videos?sortKey=registeredAt&sortOrder=desc&pageSize= %s &page= %s '
2021-04-05 02:41:21 -05:00
_PAGE_SIZE = 100
_API_HEADERS = {
2021-03-20 04:07:57 -05:00
' X-Frontend-ID ' : ' 6 ' ,
2024-06-11 18:09:58 -05:00
' X-Frontend-Version ' : ' 0 ' ,
2021-03-20 04:07:57 -05:00
}
2021-08-27 20:37:13 -05:00
def _entries ( self , list_id ) :
2021-03-20 04:07:57 -05:00
total_count = 1
count = page_num = 0
while count < total_count :
json_parsed = self . _download_json (
self . _API_URL % ( list_id , self . _PAGE_SIZE , page_num + 1 ) , list_id ,
2021-04-05 02:41:21 -05:00
headers = self . _API_HEADERS ,
2024-06-11 18:09:58 -05:00
note = ' Downloading JSON metadata %s ' % ( f ' page { page_num } ' if page_num else ' ' ) )
2021-03-20 04:07:57 -05:00
if not page_num :
total_count = int_or_none ( json_parsed [ ' data ' ] . get ( ' totalCount ' ) )
2024-06-11 18:09:58 -05:00
for entry in json_parsed [ ' data ' ] [ ' items ' ] :
2021-03-20 04:07:57 -05:00
count + = 1
2024-10-26 08:52:09 -05:00
yield self . url_result (
f ' https://www.nicovideo.jp/watch/ { entry [ " essential " ] [ " id " ] } ' , ie = NiconicoIE )
2021-03-20 04:07:57 -05:00
page_num + = 1
def _real_extract ( self , url ) :
list_id = self . _match_id ( url )
2024-10-26 08:52:09 -05:00
return self . playlist_result ( self . _entries ( list_id ) , list_id )
2023-05-29 04:35:10 -05:00
class NiconicoLiveIE ( InfoExtractor ) :
IE_NAME = ' niconico:live '
IE_DESC = ' ニコニコ生放送 '
_VALID_URL = r ' https?://(?:sp \ .)?live2? \ .nicovideo \ .jp/(?:watch|gate)/(?P<id>lv \ d+) '
_TESTS = [ {
' note ' : ' this test case includes invisible characters for title, pasting them as-is ' ,
' url ' : ' https://live.nicovideo.jp/watch/lv339533123 ' ,
' info_dict ' : {
' id ' : ' lv339533123 ' ,
2024-06-11 18:09:58 -05:00
' title ' : ' 激辛ペヤング食べます \u202a ( ;ᯅ; ) \u202c (歌枠オーディション参加中) ' ,
2023-05-29 04:35:10 -05:00
' view_count ' : 1526 ,
' comment_count ' : 1772 ,
' description ' : ' 初めましてもかって言います❕ \n のんびり自由に適当に暮らしてます ' ,
' uploader ' : ' もか ' ,
' channel ' : ' ゲストさんのコミュニティ ' ,
' channel_id ' : ' co5776900 ' ,
' channel_url ' : ' https://com.nicovideo.jp/community/co5776900 ' ,
' timestamp ' : 1670677328 ,
' is_live ' : True ,
} ,
' skip ' : ' livestream ' ,
} , {
' url ' : ' https://live2.nicovideo.jp/watch/lv339533123 ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://sp.live.nicovideo.jp/watch/lv339533123 ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://sp.live2.nicovideo.jp/watch/lv339533123 ' ,
' only_matching ' : True ,
} ]
_KNOWN_LATENCY = ( ' high ' , ' low ' )
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
webpage , urlh = self . _download_webpage_handle ( f ' https://live.nicovideo.jp/watch/ { video_id } ' , video_id )
embedded_data = self . _parse_json ( unescapeHTML ( self . _search_regex (
r ' <script \ s+id= " embedded-data " \ s*data-props= " (.+?) " ' , webpage , ' embedded data ' ) ) , video_id )
ws_url = traverse_obj ( embedded_data , ( ' site ' , ' relive ' , ' webSocketUrl ' ) )
if not ws_url :
raise ExtractorError ( ' The live hasn \' t started yet or already ended. ' , expected = True )
ws_url = update_url_query ( ws_url , {
' frontend_id ' : traverse_obj ( embedded_data , ( ' site ' , ' frontendId ' ) ) or ' 9 ' ,
} )
2024-03-31 19:01:09 -05:00
hostname = remove_start ( urllib . parse . urlparse ( urlh . url ) . hostname , ' sp. ' )
2023-05-29 04:35:10 -05:00
latency = try_get ( self . _configuration_arg ( ' latency ' ) , lambda x : x [ 0 ] )
if latency not in self . _KNOWN_LATENCY :
latency = ' high '
2023-11-20 02:04:04 -06:00
ws = self . _request_webpage (
Request ( ws_url , headers = { ' Origin ' : f ' https:// { hostname } ' } ) ,
video_id = video_id , note = ' Connecting to WebSocket server ' )
2023-05-29 04:35:10 -05:00
self . write_debug ( ' [debug] Sending HLS server request ' )
ws . send ( json . dumps ( {
' type ' : ' startWatching ' ,
' data ' : {
' stream ' : {
' quality ' : ' abr ' ,
' protocol ' : ' hls+fmp4 ' ,
' latency ' : latency ,
2024-06-11 18:09:58 -05:00
' chasePlay ' : False ,
2023-05-29 04:35:10 -05:00
} ,
' room ' : {
' protocol ' : ' webSocket ' ,
2024-06-11 18:09:58 -05:00
' commentable ' : True ,
2023-05-29 04:35:10 -05:00
} ,
' reconnect ' : False ,
2024-06-11 18:09:58 -05:00
} ,
2023-05-29 04:35:10 -05:00
} ) )
while True :
recv = ws . recv ( )
if not recv :
continue
data = json . loads ( recv )
if not isinstance ( data , dict ) :
continue
if data . get ( ' type ' ) == ' stream ' :
m3u8_url = data [ ' data ' ] [ ' uri ' ]
qualities = data [ ' data ' ] [ ' availableQualities ' ]
break
elif data . get ( ' type ' ) == ' disconnect ' :
self . write_debug ( recv )
raise ExtractorError ( ' Disconnected at middle of extraction ' )
elif data . get ( ' type ' ) == ' error ' :
self . write_debug ( recv )
message = traverse_obj ( data , ( ' body ' , ' code ' ) ) or recv
raise ExtractorError ( message )
elif self . get_param ( ' verbose ' , False ) :
if len ( recv ) > 100 :
recv = recv [ : 100 ] + ' ... '
2024-06-11 18:09:58 -05:00
self . write_debug ( f ' Server said: { recv } ' )
2023-05-29 04:35:10 -05:00
title = traverse_obj ( embedded_data , ( ' program ' , ' title ' ) ) or self . _html_search_meta (
( ' og:title ' , ' twitter:title ' ) , webpage , ' live title ' , fatal = False )
raw_thumbs = traverse_obj ( embedded_data , ( ' program ' , ' thumbnail ' ) ) or { }
thumbnails = [ ]
for name , value in raw_thumbs . items ( ) :
if not isinstance ( value , dict ) :
thumbnails . append ( {
' id ' : name ,
' url ' : value ,
* * parse_resolution ( value , lenient = True ) ,
} )
continue
for k , img_url in value . items ( ) :
res = parse_resolution ( k , lenient = True ) or parse_resolution ( img_url , lenient = True )
width , height = res . get ( ' width ' ) , res . get ( ' height ' )
thumbnails . append ( {
' id ' : f ' { name } _ { width } x { height } ' ,
' url ' : img_url ,
2025-02-20 09:39:06 -06:00
' ext ' : traverse_obj ( parse_qs ( img_url ) , ( ' image ' , 0 , { determine_ext ( default_ext = ' jpg ' ) } ) ) ,
2023-05-29 04:35:10 -05:00
* * res ,
} )
formats = self . _extract_m3u8_formats ( m3u8_url , video_id , ext = ' mp4 ' , live = True )
for fmt , q in zip ( formats , reversed ( qualities [ 1 : ] ) ) :
fmt . update ( {
' format_id ' : q ,
' protocol ' : ' niconico_live ' ,
' ws ' : ws ,
' video_id ' : video_id ,
' live_latency ' : latency ,
' origin ' : hostname ,
} )
return {
' id ' : video_id ,
' title ' : title ,
* * traverse_obj ( embedded_data , {
' view_count ' : ( ' program ' , ' statistics ' , ' watchCount ' ) ,
' comment_count ' : ( ' program ' , ' statistics ' , ' commentCount ' ) ,
' uploader ' : ( ' program ' , ' supplier ' , ' name ' ) ,
' channel ' : ( ' socialGroup ' , ' name ' ) ,
' channel_id ' : ( ' socialGroup ' , ' id ' ) ,
' channel_url ' : ( ' socialGroup ' , ' socialGroupPageUrl ' ) ,
} ) ,
' description ' : clean_html ( traverse_obj ( embedded_data , ( ' program ' , ' description ' ) ) ) ,
' timestamp ' : int_or_none ( traverse_obj ( embedded_data , ( ' program ' , ' openTime ' ) ) ) ,
' is_live ' : True ,
' thumbnails ' : thumbnails ,
' formats ' : formats ,
}