yt-dlp/yt_dlp/extractor/firsttv.py

import urllib.parse

from .common import InfoExtractor
from ..utils import (
    determine_ext,
    int_or_none,
    join_nonempty,
    mimetype2ext,
    parse_qs,
    unified_strdate,
    url_or_none,
)
from ..utils.traversal import traverse_obj


class FirstTVIE(InfoExtractor):
    IE_NAME = '1tv'
    IE_DESC = 'Первый канал'
    _VALID_URL = r'https?://(?:www\.)?(?:sport)?1tv\.ru/(?:[^/?#]+/)+(?P<id>[^/?#]+)'

    _TESTS = [{
        # single format; has item.id
        'url': 'https://www.1tv.ru/shows/naedine-so-vsemi/vypuski/gost-lyudmila-senchina-naedine-so-vsemi-vypusk-ot-12-02-2015',
        'md5': '8011ae8e88ff4150107ab9c5a8f5b659',
        'info_dict': {
            'id': '40049',
            'ext': 'mp4',
            'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015',
            'thumbnail': r're:https?://.+/.+\.jpg',
            'upload_date': '20150212',
            'duration': 2694,
        },
        'params': {'skip_download': 'm3u8'},
    }, {
        # multiple formats; has item.id
        'url': 'https://www.1tv.ru/shows/dobroe-utro/pro-zdorove/vesennyaya-allergiya-dobroe-utro-fragment-vypuska-ot-07042016',
        'info_dict': {
            'id': '364746',
            'ext': 'mp4',
            'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016',
            'thumbnail': r're:https?://.+/.+\.jpg',
            'upload_date': '20160407',
            'duration': 179,
            'formats': 'mincount:3',
        },
        'params': {'skip_download': 'm3u8'},
    }, {
        'url': 'https://www.1tv.ru/news/issue/2016-12-01/14:00',
        'info_dict': {
            'id': '14:00',
            'title': 'Выпуск программы «Время» в 20:00   1 декабря 2016 года. Новости. Первый канал',
            'thumbnail': 'https://static.1tv.ru/uploads/photo/image/8/big/338448_big_8fc7eb236f.jpg',
        },
        'playlist_count': 13,
    }, {
        # has timestamp; has item.uid but not item.id
        'url': 'https://www.1tv.ru/shows/segodnya-vecherom/vypuski/avtory-odnogo-hita-segodnya-vecherom-vypusk-ot-03-05-2025',
        'info_dict': {
            'id': '270411',
            'ext': 'mp4',
            'title': 'Авторы одного хита. Сегодня вечером. Выпуск от 03.05.2025',
            'thumbnail': r're:https?://.+/.+\.jpg',
            'timestamp': 1746286020,
            'upload_date': '20250503',
        },
        'params': {'skip_download': 'm3u8'},
    }, {
        'url': 'http://www.1tv.ru/shows/tochvtoch-supersezon/vystupleniya/evgeniy-dyatlov-vladimir-vysockiy-koni-priveredlivye-toch-v-toch-supersezon-fragment-vypuska-ot-06-11-2016',
        'only_matching': True,
    }, {
        'url': 'https://www.sport1tv.ru/sport/chempionat-rossii-po-figurnomu-kataniyu-2025',
        'only_matching': True,
    }]

    def _entries(self, items):
        for item in items:
            video_id = str(item.get('id') or item['uid'])

            formats, subtitles = [], {}
            for f in traverse_obj(item, ('sources', lambda _, v: url_or_none(v['src']))):
                src = f['src']
                ext = mimetype2ext(f.get('type'), default=determine_ext(src))
                if ext == 'm3u8':
                    fmts, subs = self._extract_m3u8_formats_and_subtitles(
                        src, video_id, 'mp4', m3u8_id='hls', fatal=False)
                elif ext == 'mpd':
                    fmts, subs = self._extract_mpd_formats_and_subtitles(
                        src, video_id, mpd_id='dash', fatal=False)
                else:
                    tbr = self._search_regex(fr'_(\d{{3,}})\.{ext}', src, 'tbr', default=None)
                    formats.append({
                        'url': src,
                        'ext': ext,
                        'format_id': join_nonempty('http', ext, tbr),
                        'tbr': int_or_none(tbr),
                        # quality metadata of http formats may be incorrect
                        'quality': -10,
                    })
                    continue
                formats.extend(fmts)
                self._merge_subtitles(subs, target=subtitles)

            yield {
                **traverse_obj(item, {
                    'title': ('title', {str}),
                    'thumbnail': ('poster', {url_or_none}),
                    'timestamp': ('dvr_begin_at', {int_or_none}),
                    'upload_date': ('date_air', {unified_strdate}),
                    'duration': ('duration', {int_or_none}),
                }),
                'id': video_id,
                'formats': formats,
                'subtitles': subtitles,
            }

    def _real_extract(self, url):
        display_id = self._match_id(url)

        webpage = self._download_webpage(url, display_id)
        playlist_url = urllib.parse.urljoin(url, self._html_search_regex(
            r'data-playlist-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
            webpage, 'playlist url', group='url'))

        item_ids = traverse_obj(parse_qs(playlist_url), 'video_id', 'videos_ids[]', 'news_ids[]')
        items = traverse_obj(
            self._download_json(playlist_url, display_id),
            lambda _, v: v['uid'] and (str(v['uid']) in item_ids if item_ids else True))

        return self.playlist_result(
            self._entries(items), display_id, self._og_search_title(webpage, default=None),
            thumbnail=self._og_search_thumbnail(webpage, default=None))
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 18:09:58 -05:00
+								import urllib.parse
-												[firsttv] Add support for 1tv.ru videoarchive
											
										
										
											2014-02-10 11:20:41 -06:00
+								from .common import InfoExtractor
-												[1tv] Fix extraction (Closes #9103)

											
										
										
											2016-04-09 16:02:35 -05:00
+								from ..utils import (
-												[ie/1tv] Fix extractor (#13168)

Closes #13167
Authored by: bashonly
											
										
										
											2025-05-16 18:16:03 -05:00
+								    determine_ext,
-												[1tv] Fix extraction (Closes #9103)

											
										
										
											2016-04-09 16:02:35 -05:00
+								    int_or_none,
-												[ie/1tv] Fix extractor (#13168)

Closes #13167
Authored by: bashonly
											
										
										
											2025-05-16 18:16:03 -05:00
+								    join_nonempty,
 								    mimetype2ext,
 								    parse_qs,
-												[1tv] Fix extraction (Closes #9103)

											
										
										
											2016-04-09 16:02:35 -05:00
+								    unified_strdate,
-												Improve URL extraction

											
										
										
											2018-07-21 07:08:28 -05:00
+								    url_or_none,
-												[1tv] Fix extraction (Closes #9103)

											
										
										
											2016-04-09 16:02:35 -05:00
+								)
-												[ie/1tv] Fix extractor (#13168)

Closes #13167
Authored by: bashonly
											
										
										
											2025-05-16 18:16:03 -05:00
+								from ..utils.traversal import traverse_obj
-												[firsttv] Add support for 1tv.ru videoarchive
											
										
										
											2014-02-10 11:20:41 -06:00
 								class FirstTVIE(InfoExtractor):
-												[1tv] Cover arbitraty URLs

											
										
										
											2015-02-13 14:04:28 -06:00
+								    IE_NAME = '1tv'
 								    IE_DESC = 'Первый канал'
-												[ie/1tv] Support sport1tv.ru domain (#11889)

Closes #11894
Authored by: kvk-2015
											
										
										
											2025-01-25 15:21:45 -06:00
+								    _VALID_URL = r'https?://(?:www\.)?(?:sport)?1tv\.ru/(?:[^/?#]+/)+(?P<id>[^/?#]+)'
-												[firsttv] Add support for 1tv.ru videoarchive
											
										
										
											2014-02-10 11:20:41 -06:00
-												[1tv] Cover arbitraty URLs

											
										
										
											2015-02-13 14:04:28 -06:00
+								    _TESTS = [{
-												[ie/1tv] Fix extractor (#13168)

Closes #13167
Authored by: bashonly
											
										
										
											2025-05-16 18:16:03 -05:00
+								        # single format; has item.id
 								        'url': 'https://www.1tv.ru/shows/naedine-so-vsemi/vypuski/gost-lyudmila-senchina-naedine-so-vsemi-vypusk-ot-12-02-2015',
 								        'md5': '8011ae8e88ff4150107ab9c5a8f5b659',
-												[firsttv] Add support for 1tv.ru videoarchive
											
										
										
											2014-02-10 11:20:41 -06:00
+								        'info_dict': {
-												[firsttv] fix extraction(closes #9249)

											
										
										
											2016-08-21 11:55:47 -05:00
+								            'id': '40049',
-												[firsttv] Add support for 1tv.ru videoarchive
											
										
										
											2014-02-10 11:20:41 -06:00
+								            'ext': 'mp4',
-												[1tv] Improve extraction and add support for playlists (closes #11335)

											
										
										
											2016-12-04 10:20:14 -06:00
+								            'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015',
-												[ie/1tv] Fix extractor (#13168)

Closes #13167
Authored by: bashonly
											
										
										
											2025-05-16 18:16:03 -05:00
+								            'thumbnail': r're:https?://.+/.+\.jpg',
-												[1tv] Fix extraction (Closes #9103)

											
										
										
											2016-04-09 16:02:35 -05:00
+								            'upload_date': '20150212',
 								            'duration': 2694,
-												[firsttv] Skip test
											
										
										
											2014-02-10 21:26:52 -06:00
+								        },
-												[ie/1tv] Fix extractor (#13168)

Closes #13167
Authored by: bashonly
											
										
										
											2025-05-16 18:16:03 -05:00
+								        'params': {'skip_download': 'm3u8'},
-												[1tv] Cover arbitraty URLs

											
										
										
											2015-02-13 14:04:28 -06:00
+								    }, {
-												[ie/1tv] Fix extractor (#13168)

Closes #13167
Authored by: bashonly
											
										
										
											2025-05-16 18:16:03 -05:00
+								        # multiple formats; has item.id
 								        'url': 'https://www.1tv.ru/shows/dobroe-utro/pro-zdorove/vesennyaya-allergiya-dobroe-utro-fragment-vypuska-ot-07042016',
-												[firsttv] keep a test videos with multiple formats

											
										
										
											2016-08-21 13:11:51 -05:00
+								        'info_dict': {
 								            'id': '364746',
 								            'ext': 'mp4',
-												[1tv] Improve extraction and add support for playlists (closes #11335)

											
										
										
											2016-12-04 10:20:14 -06:00
+								            'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016',
-												[ie/1tv] Fix extractor (#13168)

Closes #13167
Authored by: bashonly
											
										
										
											2025-05-16 18:16:03 -05:00
+								            'thumbnail': r're:https?://.+/.+\.jpg',
-												[firsttv] keep a test videos with multiple formats

											
										
										
											2016-08-21 13:11:51 -05:00
+								            'upload_date': '20160407',
 								            'duration': 179,
 								            'formats': 'mincount:3',
 								        },
-												[ie/1tv] Fix extractor (#13168)

Closes #13167
Authored by: bashonly
											
										
										
											2025-05-16 18:16:03 -05:00
+								        'params': {'skip_download': 'm3u8'},
-												[1tv] Improve extraction and add support for playlists (closes #11335)

											
										
										
											2016-12-04 10:20:14 -06:00
+								    }, {
-												[ie/1tv] Fix extractor (#13168)

Closes #13167
Authored by: bashonly
											
										
										
											2025-05-16 18:16:03 -05:00
+								        'url': 'https://www.1tv.ru/news/issue/2016-12-01/14:00',
-												[1tv] Improve extraction and add support for playlists (closes #11335)

											
										
										
											2016-12-04 10:20:14 -06:00
+								        'info_dict': {
 								            'id': '14:00',
-												[ie/1tv] Fix extractor (#13168)

Closes #13167
Authored by: bashonly
											
										
										
											2025-05-16 18:16:03 -05:00
+								            'title': 'Выпуск программы «Время» в 20:00   1 декабря 2016 года. Новости. Первый канал',
 								            'thumbnail': 'https://static.1tv.ru/uploads/photo/image/8/big/338448_big_8fc7eb236f.jpg',
-												[1tv] Improve extraction and add support for playlists (closes #11335)

											
										
										
											2016-12-04 10:20:14 -06:00
+								        },
 								        'playlist_count': 13,
-												[ie/1tv] Fix extractor (#13168)

Closes #13167
Authored by: bashonly
											
										
										
											2025-05-16 18:16:03 -05:00
+								    }, {
 								        # has timestamp; has item.uid but not item.id
 								        'url': 'https://www.1tv.ru/shows/segodnya-vecherom/vypuski/avtory-odnogo-hita-segodnya-vecherom-vypusk-ot-03-05-2025',
 								        'info_dict': {
 								            'id': '270411',
 								            'ext': 'mp4',
 								            'title': 'Авторы одного хита. Сегодня вечером. Выпуск от 03.05.2025',
 								            'thumbnail': r're:https?://.+/.+\.jpg',
 								            'timestamp': 1746286020,
 								            'upload_date': '20250503',
 								        },
 								        'params': {'skip_download': 'm3u8'},
-												[1tv] Improve extraction and add support for playlists (closes #11335)

											
										
										
											2016-12-04 10:20:14 -06:00
+								    }, {
 								        'url': 'http://www.1tv.ru/shows/tochvtoch-supersezon/vystupleniya/evgeniy-dyatlov-vladimir-vysockiy-koni-priveredlivye-toch-v-toch-supersezon-fragment-vypuska-ot-06-11-2016',
 								        'only_matching': True,
-												[ie/1tv] Support sport1tv.ru domain (#11889)

Closes #11894
Authored by: kvk-2015
											
										
										
											2025-01-25 15:21:45 -06:00
+								    }, {
 								        'url': 'https://www.sport1tv.ru/sport/chempionat-rossii-po-figurnomu-kataniyu-2025',
 								        'only_matching': True,
-												[1tv] Cover arbitraty URLs

											
										
										
											2015-02-13 14:04:28 -06:00
+								    }]
-												[firsttv] Add support for 1tv.ru videoarchive
											
										
										
											2014-02-10 11:20:41 -06:00
-												[ie/1tv] Fix extractor (#13168)

Closes #13167
Authored by: bashonly
											
										
										
											2025-05-16 18:16:03 -05:00
+								    def _entries(self, items):
 								        for item in items:
 								            video_id = str(item.get('id') or item['uid'])
 								            formats, subtitles = [], {}
 								            for f in traverse_obj(item, ('sources', lambda _, v: url_or_none(v['src']))):
 								                src = f['src']
 								                ext = mimetype2ext(f.get('type'), default=determine_ext(src))
 								                if ext == 'm3u8':
 								                    fmts, subs = self._extract_m3u8_formats_and_subtitles(
 								                        src, video_id, 'mp4', m3u8_id='hls', fatal=False)
 								                elif ext == 'mpd':
 								                    fmts, subs = self._extract_mpd_formats_and_subtitles(
 								                        src, video_id, mpd_id='dash', fatal=False)
 								                else:
 								                    tbr = self._search_regex(fr'_(\d{{3,}})\.{ext}', src, 'tbr', default=None)
 								                    formats.append({
 								                        'url': src,
 								                        'ext': ext,
 								                        'format_id': join_nonempty('http', ext, tbr),
 								                        'tbr': int_or_none(tbr),
 								                        # quality metadata of http formats may be incorrect
 								                        'quality': -10,
 								                    })
 								                    continue
 								                formats.extend(fmts)
 								                self._merge_subtitles(subs, target=subtitles)
 								            yield {
 								                **traverse_obj(item, {
 								                    'title': ('title', {str}),
 								                    'thumbnail': ('poster', {url_or_none}),
 								                    'timestamp': ('dvr_begin_at', {int_or_none}),
 								                    'upload_date': ('date_air', {unified_strdate}),
 								                    'duration': ('duration', {int_or_none}),
 								                }),
 								                'id': video_id,
 								                'formats': formats,
 								                'subtitles': subtitles,
 								            }
-												[firsttv] Add support for 1tv.ru videoarchive
											
										
										
											2014-02-10 11:20:41 -06:00
+								    def _real_extract(self, url):
-												[firsttv] fix extraction(closes #9249)

											
										
										
											2016-08-21 11:55:47 -05:00
+								        display_id = self._match_id(url)
-												[firsttv] Add support for 1tv.ru videoarchive
											
										
										
											2014-02-10 11:20:41 -06:00
-												[firsttv] fix extraction(closes #9249)

											
										
										
											2016-08-21 11:55:47 -05:00
+								        webpage = self._download_webpage(url, display_id)
-												[ie/1tv] Fix extractor (#13168)

Closes #13167
Authored by: bashonly
											
										
										
											2025-05-16 18:16:03 -05:00
+								        playlist_url = urllib.parse.urljoin(url, self._html_search_regex(
-												[1tv] Improve extraction and add support for playlists (closes #11335)

											
										
										
											2016-12-04 10:20:14 -06:00
+								            r'data-playlist-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
 								            webpage, 'playlist url', group='url'))
-												[ie/1tv] Fix extractor (#13168)

Closes #13167
Authored by: bashonly
											
										
										
											2025-05-16 18:16:03 -05:00
+								        item_ids = traverse_obj(parse_qs(playlist_url), 'video_id', 'videos_ids[]', 'news_ids[]')
 								        items = traverse_obj(
 								            self._download_json(playlist_url, display_id),
 								            lambda _, v: v['uid'] and (str(v['uid']) in item_ids if item_ids else True))
-												[firsttv] Add support for 1tv.ru videoarchive
											
										
										
											2014-02-10 11:20:41 -06:00
-												[ie/1tv] Fix extractor (#13168)

Closes #13167
Authored by: bashonly
											
										
										
											2025-05-16 18:16:03 -05:00
+								        return self.playlist_result(
 								            self._entries(items), display_id, self._og_search_title(webpage, default=None),
 								            thumbnail=self._og_search_thumbnail(webpage, default=None))