From 4af718374e82e55460ae90fbf7282756556a31ec Mon Sep 17 00:00:00 2001
From: kclauhk <78251477+kclauhk@users.noreply.github.com>
Date: Sat, 31 Aug 2024 18:35:39 +0800
Subject: [PATCH 1/5] [ie/vmware] Add extractor

---
 yt_dlp/extractor/_extractors.py |   4 ++
 yt_dlp/extractor/brightcove.py  |   3 +-
 yt_dlp/extractor/vmware.py      | 100 ++++++++++++++++++++++++++++++++
 3 files changed, 106 insertions(+), 1 deletion(-)
 create mode 100644 yt_dlp/extractor/vmware.py
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index a3610dc976..891ca84ccc 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -2365,6 +2365,10 @@
     VKUserVideosIE,
     VKWallPostIE,
 )
+from .vmware import (
+    VMwareExploreIE,
+    VMwareExploreSearchIE,
+)
 from .vocaroo import VocarooIE
 from .vodpl import VODPlIE
 from .vodplatform import VODPlatformIE
diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py
index 2526f25dac..e621046253 100644
--- a/yt_dlp/extractor/brightcove.py
+++ b/yt_dlp/extractor/brightcove.py
@@ -600,7 +600,8 @@ def build_format_id(kind):
         return {
             'id': video_id,
             'title': title,
-            'description': clean_html(json_data.get('description')),
+            'description': clean_html(join_nonempty('description', 'long_description',
+                                                    from_dict=json_data, delim='<br>')),
             'thumbnails': thumbnails,
             'duration': duration,
             'timestamp': parse_iso8601(json_data.get('published_at')),
diff --git a/yt_dlp/extractor/vmware.py b/yt_dlp/extractor/vmware.py
new file mode 100644
index 0000000000..f219a5c757
--- /dev/null
+++ b/yt_dlp/extractor/vmware.py
@@ -0,0 +1,100 @@
+import itertools
+
+from .common import InfoExtractor, SearchInfoExtractor
+
+
+class VMwareExploreIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?vmware\.com/explore/video-library/video/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.vmware.com/explore/video-library/video/6360758183112',
+        'info_dict': {
+            'id': '6360758183112',
+            'ext': 'mp4',
+            'title': 'VCFB1440LV',
+            'description': r're:^All About vSphere 8: What\'s New in the Technology',
+            'thumbnail': 'https://cf-images.us-east-1.prod.boltdns.net/v1/jit/6164421911001/cde65c5a-51ff-4a0c-905f-ed71e25c0f2c/main/1920x1080/22m53s824ms/match/image.jpg',
+            'tags': 'count:6',
+            'timestamp': 1724585610,
+            'upload_date': '20240825',
+            'uploader_id': '6164421911001',
+            'duration': 2747.648,
+        },
+    }, {
+        'url': 'https://www.vmware.com/explore/video-library/video/6360759173112',
+        'info_dict': {
+            'id': '6360759173112',
+            'ext': 'mp4',
+            'title': 'AODB1676LV',
+            'description': r're:^Automation, Analytics and Intelligence: Our Quest for Operational Excellence',
+            'thumbnail': 'https://cf-images.us-east-1.prod.boltdns.net/v1/jit/6164421911001/56cc0c8e-9d51-4c25-9d97-4b7364989c47/main/1920x1080/14m18s858ms/match/image.jpg',
+            'tags': 'count:6',
+            'timestamp': 1724585574,
+            'upload_date': '20240825',
+            'uploader_id': '6164421911001',
+            'duration': 1717.717,
+        },
+    }, {
+        'url': 'https://www.vmware.com/explore/video-library/video/6360760732112',
+        'info_dict': {
+            'id': '6360760732112',
+            'ext': 'mp4',
+            'title': 'ANSB1976LV',
+            'description': r're:^The Conman of the Digital Era — Ransomware',
+            'thumbnail': 'https://cf-images.us-east-1.prod.boltdns.net/v1/jit/6164421911001/4ec22e41-7812-49d9-9fc8-5dbcf1ef4b3c/main/1920x1080/22m36s555ms/match/image.jpg',
+            'tags': 'count:6',
+            'timestamp': 1724585612,
+            'upload_date': '20240825',
+            'uploader_id': '6164421911001',
+            'duration': 2713.11,
+        },
+    }]
+    BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/6164421911001/lUBT2rAMW_default/index.html?videoId=%s'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % video_id, url_transparent=True)
+
+
+class VMwareExploreSearchIE(SearchInfoExtractor):
+    IE_NAME = 'VMwareExplore:search'
+    _SEARCH_KEY = 'vmwaresearch'
+    _TESTS = [{
+        'url': 'vmwaresearch10:*',
+        'playlist_count': 10,
+        'info_dict': {
+            'id': '*',
+            'title': '*',
+        },
+    }, {
+        'url': 'vmwaresearchall:ransomware',
+        'playlist_count': 15,
+        'info_dict': {
+            'id': 'ransomware',
+            'title': 'ransomware',
+        },
+    }]
+    _URL_TEMPLATE = 'https://www.vmware.com/explore/video-library/video/%s'
+
+    def _search_results(self, query):
+        def search_query(query, offset, limit, total_count):
+            # search api:
+            # https://www.vmware.com/api/nocache/tools/brightcove/search?q=%2B{query}%20%2Byear:2023:2024%20%20-vod_on_demand_publish:%22False%22%2Bcomplete:%22true%22%2Bstate:%22ACTIVE%22&limit=12&offset=0&sort=-updated_at&account=explore
+            return self._download_json(
+                'https://www.vmware.com/api/nocache/tools/brightcove/search', query,
+                note=f'Downloading result {offset + 1}-{min(offset + limit, total_count or 99999999)}', query={
+                    'q': f'+{query} -vod_on_demand_publish:"False"+complete:"true"+state:"ACTIVE"',
+                    'limit': limit,
+                    'offset': offset,
+                    'sort': 'updated_at',   # chronological ascending order. For descending order: '-updated_at'
+                    'account': 'explore',
+                })
+
+        limit, total_count = 100, None      # limit: maximum 100
+        for i in itertools.count():
+            search_results = search_query(query, i * limit, limit, total_count)
+            total_count = search_results.get('count', 0)
+            for video in search_results.get('videos', []):
+                if video_id := video.get('id'):
+                    yield self.url_result(self._URL_TEMPLATE % video_id)
+            if (i + 1) * limit >= total_count:
+                break

From e3309bc775474e191ad6426ebb69bc831ccadf65 Mon Sep 17 00:00:00 2001
From: kclauhk <78251477+kclauhk@users.noreply.github.com>
Date: Sat, 7 Sep 2024 16:37:07 +0800
Subject: [PATCH 2/5] Update test data

---
 yt_dlp/extractor/vmware.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/vmware.py b/yt_dlp/extractor/vmware.py
index f219a5c757..1a14ab0709 100644
--- a/yt_dlp/extractor/vmware.py
+++ b/yt_dlp/extractor/vmware.py
@@ -12,7 +12,7 @@ class VMwareExploreIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'VCFB1440LV',
             'description': r're:^All About vSphere 8: What\'s New in the Technology',
-            'thumbnail': 'https://cf-images.us-east-1.prod.boltdns.net/v1/jit/6164421911001/cde65c5a-51ff-4a0c-905f-ed71e25c0f2c/main/1920x1080/22m53s824ms/match/image.jpg',
+            'thumbnail': 'https://cf-images.us-east-1.prod.boltdns.net/v1/static/6164421911001/cde65c5a-51ff-4a0c-905f-ed71e25c0f2c/80d7489b-7b65-47d9-b30c-8056b132892f/1920x1080/match/image.jpg',
             'tags': 'count:6',
             'timestamp': 1724585610,
             'upload_date': '20240825',

From d4768b66eaea65c361d333a03bebbcec030bffde Mon Sep 17 00:00:00 2001
From: kclauhk <kc.lau@mail.com>
Date: Sun, 27 Oct 2024 03:07:03 +0800
Subject: [PATCH 3/5] Add support to VMware Video Library (not Explore)

---
 yt_dlp/extractor/_extractors.py |  3 +-
 yt_dlp/extractor/vmware.py      | 89 ++++++++++++++++++++++++---------
 2 files changed, 68 insertions(+), 24 deletions(-)

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 891ca84ccc..008294becb 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -2367,7 +2367,8 @@
 )
 from .vmware import (
     VMwareExploreIE,
-    VMwareExploreSearchIE,
+    VMwareIE,
+    VMwareSearchIE,
 )
 from .vocaroo import VocarooIE
 from .vodpl import VODPlIE
diff --git a/yt_dlp/extractor/vmware.py b/yt_dlp/extractor/vmware.py
index 1a14ab0709..2385cee294 100644
--- a/yt_dlp/extractor/vmware.py
+++ b/yt_dlp/extractor/vmware.py
@@ -3,6 +3,44 @@
 from .common import InfoExtractor, SearchInfoExtractor
 
 
+class VMwareIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?vmware\.com/video/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.vmware.com/video/6362484671112',
+        'info_dict': {
+            'id': '6362484671112',
+            'ext': 'mp4',
+            'title': 'GCI Communications',
+            'description': '',
+            'thumbnail': r're:^https?://.*/image\.jpg',
+            'tags': [],
+            'timestamp': 1727345356,
+            'upload_date': '20240926',
+            'uploader_id': '6415665063001',
+            'duration': 106.283,
+        },
+    }, {
+        'url': 'https://www.vmware.com/video/6350300466112',
+        'info_dict': {
+            'id': '6350300466112',
+            'ext': 'mp4',
+            'title': 'VMware Private AI',
+            'description': r're:^Learn the significance of AI and Generative AI',
+            'thumbnail': r're:^https?://.*/image\.jpg',
+            'tags': 'count:8',
+            'timestamp': 1712293111,
+            'upload_date': '20240405',
+            'uploader_id': '6415665063001',
+            'duration': 3154.624,
+        },
+    }]
+    BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/6415665063001/83iWkhhmz_default/index.html?videoId=%s'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % video_id, url_transparent=True)
+
+
 class VMwareExploreIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?vmware\.com/explore/video-library/video/(?P<id>\d+)'
     _TESTS = [{
@@ -12,7 +50,7 @@ class VMwareExploreIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'VCFB1440LV',
             'description': r're:^All About vSphere 8: What\'s New in the Technology',
-            'thumbnail': 'https://cf-images.us-east-1.prod.boltdns.net/v1/static/6164421911001/cde65c5a-51ff-4a0c-905f-ed71e25c0f2c/80d7489b-7b65-47d9-b30c-8056b132892f/1920x1080/match/image.jpg',
+            'thumbnail': r're:^https?://.*/image\.jpg',
             'tags': 'count:6',
             'timestamp': 1724585610,
             'upload_date': '20240825',
@@ -26,7 +64,7 @@ class VMwareExploreIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'AODB1676LV',
             'description': r're:^Automation, Analytics and Intelligence: Our Quest for Operational Excellence',
-            'thumbnail': 'https://cf-images.us-east-1.prod.boltdns.net/v1/jit/6164421911001/56cc0c8e-9d51-4c25-9d97-4b7364989c47/main/1920x1080/14m18s858ms/match/image.jpg',
+            'thumbnail': r're:^https?://.*/image\.jpg',
             'tags': 'count:6',
             'timestamp': 1724585574,
             'upload_date': '20240825',
@@ -40,7 +78,7 @@ class VMwareExploreIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'ANSB1976LV',
             'description': r're:^The Conman of the Digital Era — Ransomware',
-            'thumbnail': 'https://cf-images.us-east-1.prod.boltdns.net/v1/jit/6164421911001/4ec22e41-7812-49d9-9fc8-5dbcf1ef4b3c/main/1920x1080/22m36s555ms/match/image.jpg',
+            'thumbnail': r're:^https?://.*/image\.jpg',
             'tags': 'count:6',
             'timestamp': 1724585612,
             'upload_date': '20240825',
@@ -55,46 +93,51 @@ def _real_extract(self, url):
         return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % video_id, url_transparent=True)
 
 
-class VMwareExploreSearchIE(SearchInfoExtractor):
-    IE_NAME = 'VMwareExplore:search'
+class VMwareSearchIE(SearchInfoExtractor):
+    IE_NAME = 'VMware:search'
     _SEARCH_KEY = 'vmwaresearch'
     _TESTS = [{
         'url': 'vmwaresearch10:*',
-        'playlist_count': 10,
         'info_dict': {
             'id': '*',
             'title': '*',
         },
+        'playlist_count': 10,
     }, {
-        'url': 'vmwaresearchall:ransomware',
-        'playlist_count': 15,
+        'url': 'vmwaresearchall:uptime',
         'info_dict': {
-            'id': 'ransomware',
-            'title': 'ransomware',
+            'id': 'uptime',
+            'title': 'uptime',
         },
+        'playlist_mincount': 5,
     }]
-    _URL_TEMPLATE = 'https://www.vmware.com/explore/video-library/video/%s'
+    _LIBRARY_MAP = {
+        'explore': ('VMware Explore Video Library', 'https://www.vmware.com/explore/video-library/video/%s'),
+        'vmware': ('VMware Video Library', 'https://www.vmware.com/video/%s'),
+    }
 
     def _search_results(self, query):
-        def search_query(query, offset, limit, total_count):
+        def search_query(query, offset, limit, account):
             # search api:
             # https://www.vmware.com/api/nocache/tools/brightcove/search?q=%2B{query}%20%2Byear:2023:2024%20%20-vod_on_demand_publish:%22False%22%2Bcomplete:%22true%22%2Bstate:%22ACTIVE%22&limit=12&offset=0&sort=-updated_at&account=explore
             return self._download_json(
                 'https://www.vmware.com/api/nocache/tools/brightcove/search', query,
-                note=f'Downloading result {offset + 1}-{min(offset + limit, total_count or 99999999)}', query={
+                note=f'Searching videos in {self._LIBRARY_MAP[account][0]}', query={
                     'q': f'+{query} -vod_on_demand_publish:"False"+complete:"true"+state:"ACTIVE"',
                     'limit': limit,
                     'offset': offset,
                     'sort': 'updated_at',   # chronological ascending order. For descending order: '-updated_at'
-                    'account': 'explore',
+                    'account': account,
                 })
 
-        limit, total_count = 100, None      # limit: maximum 100
-        for i in itertools.count():
-            search_results = search_query(query, i * limit, limit, total_count)
-            total_count = search_results.get('count', 0)
-            for video in search_results.get('videos', []):
-                if video_id := video.get('id'):
-                    yield self.url_result(self._URL_TEMPLATE % video_id)
-            if (i + 1) * limit >= total_count:
-                break
+        for account in ['explore', 'vmware']:
+            limit, total_count = 100, None      # limit: maximum 100
+            for i in itertools.count():
+                search_results = search_query(query, i * limit, limit, account)
+                total_count = search_results.get('count', 0)
+                for video in search_results.get('videos', []):
+                    if video_id := video.get('id'):
+                        yield self.url_result(self._LIBRARY_MAP[account][1] % video_id)
+                if (i + 1) * limit >= total_count:
+                    self.to_screen(f'{query}: {total_count} video(s) found')
+                    break

From 183ebedd70f99f9e718649f4be40d1d38af566bb Mon Sep 17 00:00:00 2001
From: kclauhk <78251477+kclauhk@users.noreply.github.com>
Date: Thu, 6 Feb 2025 23:42:34 +0800
Subject: [PATCH 4/5] Update search API URL

---
 yt_dlp/extractor/vmware.py | 64 +++++++++++++++++++++++++-------------
 1 file changed, 42 insertions(+), 22 deletions(-)

diff --git a/yt_dlp/extractor/vmware.py b/yt_dlp/extractor/vmware.py
index 2385cee294..6593ce359d 100644
--- a/yt_dlp/extractor/vmware.py
+++ b/yt_dlp/extractor/vmware.py
@@ -1,6 +1,12 @@
 import itertools
 
 from .common import InfoExtractor, SearchInfoExtractor
+from ..utils import (
+    float_or_none,
+    join_nonempty,
+    traverse_obj,
+    url_or_none,
+)
 
 
 class VMwareIE(InfoExtractor):
@@ -97,19 +103,19 @@ class VMwareSearchIE(SearchInfoExtractor):
     IE_NAME = 'VMware:search'
     _SEARCH_KEY = 'vmwaresearch'
     _TESTS = [{
-        'url': 'vmwaresearch10:*',
+        'url': 'vmwaresearch5:firewall',
         'info_dict': {
-            'id': '*',
-            'title': '*',
+            'id': 'firewall',
+            'title': 'firewall',
         },
-        'playlist_count': 10,
+        'playlist_count': 5,
     }, {
         'url': 'vmwaresearchall:uptime',
         'info_dict': {
             'id': 'uptime',
             'title': 'uptime',
         },
-        'playlist_mincount': 5,
+        'playlist_mincount': 2,
     }]
     _LIBRARY_MAP = {
         'explore': ('VMware Explore Video Library', 'https://www.vmware.com/explore/video-library/video/%s'),
@@ -117,27 +123,41 @@ class VMwareSearchIE(SearchInfoExtractor):
     }
 
     def _search_results(self, query):
-        def search_query(query, offset, limit, account):
+        def search_query(query, page_no, records_per_page, account):
             # search api:
-            # https://www.vmware.com/api/nocache/tools/brightcove/search?q=%2B{query}%20%2Byear:2023:2024%20%20-vod_on_demand_publish:%22False%22%2Bcomplete:%22true%22%2Bstate:%22ACTIVE%22&limit=12&offset=0&sort=-updated_at&account=explore
+            # https://api.swiftype.com/api/v1/public/engines/search.json?engine_key=J3yan3XpFywGvRxQMcEr&document_types[]=videos&&filters[videos][locale]=en-us&filters[videos][vod_on_demand_publish][]=!False&filters[videos][complete]=true&filters[videos][state]=ACTIVE&facets[videos][]=products&facets[videos][]=sessiontype&facets[videos][]=audience&facets[videos][]=track&facets[videos][]=level&filters[videos][year][]=!&filters[videos][account]=explore&q[]=ransomware&q[]=uptime&page=1&per_page=12&sort_field[videos]=updated_date&sort_direction[videos]=desc
             return self._download_json(
-                'https://www.vmware.com/api/nocache/tools/brightcove/search', query,
-                note=f'Searching videos in {self._LIBRARY_MAP[account][0]}', query={
-                    'q': f'+{query} -vod_on_demand_publish:"False"+complete:"true"+state:"ACTIVE"',
-                    'limit': limit,
-                    'offset': offset,
-                    'sort': 'updated_at',   # chronological ascending order. For descending order: '-updated_at'
-                    'account': account,
+                'https://api.swiftype.com/api/v1/public/engines/search.json', query,
+                note=f'Page {page_no}: Searching for videos in {self._LIBRARY_MAP[account][0]}', query={
+                    'engine_key': 'J3yan3XpFywGvRxQMcEr',
+                    'document_types[]': 'videos',
+                    'filters[videos][state]': 'ACTIVE',
+                    'filters[videos][account]': account,
+                    'q[]': query,
+                    'page': page_no,
+                    'per_page': records_per_page,
+                    'sort_field[videos]': 'video_id',
+                    'sort_direction[videos]': 'asc',    # 'desc' for descending order
                 })
 
         for account in ['explore', 'vmware']:
-            limit, total_count = 100, None      # limit: maximum 100
-            for i in itertools.count():
-                search_results = search_query(query, i * limit, limit, account)
-                total_count = search_results.get('count', 0)
-                for video in search_results.get('videos', []):
-                    if video_id := video.get('id'):
-                        yield self.url_result(self._LIBRARY_MAP[account][1] % video_id)
-                if (i + 1) * limit >= total_count:
+            records_per_page, total_count = 100, None   # records_per_page: maximum 100
+            for i in itertools.count(start=1, step=1):
+                search_results = search_query(query, i, records_per_page, account)
+                total_count = traverse_obj(
+                    search_results, ('info', 'videos', 'total_result_count', {int}), default=0)
+                for video in traverse_obj(search_results, ('records', 'videos', lambda _, v: v['external_id'])):
+                    yield self.url_result(self._LIBRARY_MAP[account][1] % video['external_id'],
+                        **traverse_obj(video, {
+                            'id': ('external_id', {str}),
+                            'title': ('name', {str}),
+                            'description': ({lambda v: join_nonempty('description', 'long_description',
+                                                                     from_dict=video, delim='\n')}),
+                            'thumbnail': (('images', 'thumbnail'), {url_or_none}),
+                            'tags': ('tags'),
+                            'uploader_id': ('account_id'),
+                            'duration': ('duration', {lambda v: float_or_none(v, 1000)}),
+                        }, get_all=False))
+                if i * records_per_page >= total_count:
                     self.to_screen(f'{query}: {total_count} video(s) found')
                     break

From 117b8293c52722a2c97aba6ac3315fe2fb84bab4 Mon Sep 17 00:00:00 2001
From: kclauhk <78251477+kclauhk@users.noreply.github.com>
Date: Thu, 6 Feb 2025 23:48:42 +0800
Subject: [PATCH 5/5] fix indent

---
 yt_dlp/extractor/vmware.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/vmware.py b/yt_dlp/extractor/vmware.py
index 6593ce359d..a42cbb0be5 100644
--- a/yt_dlp/extractor/vmware.py
+++ b/yt_dlp/extractor/vmware.py
@@ -147,7 +147,8 @@ def search_query(query, page_no, records_per_page, account):
                 total_count = traverse_obj(
                     search_results, ('info', 'videos', 'total_result_count', {int}), default=0)
                 for video in traverse_obj(search_results, ('records', 'videos', lambda _, v: v['external_id'])):
-                    yield self.url_result(self._LIBRARY_MAP[account][1] % video['external_id'],
+                    yield self.url_result(
+                        self._LIBRARY_MAP[account][1] % video['external_id'],
                         **traverse_obj(video, {
                             'id': ('external_id', {str}),
                             'title': ('name', {str}),