From d9195b5133946d53f13434849433c4760243b040 Mon Sep 17 00:00:00 2001
From: Allen <64094914+allendema@users.noreply.github.com>
Date: Wed, 4 Dec 2024 23:10:45 +0100
Subject: [PATCH 1/3] [core] utils: fix get_element_by_*
---
test/test_utils.py | 20 ++++++++++++++++++++
yt_dlp/utils/_utils.py | 2 +-
2 files changed, 21 insertions(+), 1 deletion(-)
diff --git a/test/test_utils.py b/test/test_utils.py
index b3de14198..2e26d224a 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1769,6 +1769,10 @@ def test_get_element_html_by_class(self):
this should be returned
-
this should also be returned
+
this should also be returned
this should also be returned
@@ -1859,10 +1859,6 @@ def test_get_elements_text_and_html_by_attribute(self):
GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[78:119]
GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT = GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML[6:-7]
- GET_ELEMENT_BY_TAG_TEST_STRING_UPPERCASE = '''
-
nice
- '''
-
def test_get_element_text_and_html_by_tag(self):
html = self.GET_ELEMENT_BY_TAG_TEST_STRING
@@ -1872,14 +1868,11 @@ def test_get_element_text_and_html_by_tag(self):
self.assertEqual(
get_element_text_and_html_by_tag('span', html),
(self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT, self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML))
-
- self.assertRaises(compat_HTMLParseError, get_element_text_and_html_by_tag, 'article', html)
-
- html = self.GET_ELEMENT_BY_TAG_TEST_STRING_UPPERCASE
-
self.assertEqual(
get_element_text_and_html_by_tag('SPAN', html),
- ('nice', html.strip()), html)
+ (self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT, self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML))
+
+ self.assertRaises(compat_HTMLParseError, get_element_text_and_html_by_tag, 'article', html)
def test_iri_to_uri(self):
self.assertEqual(
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index 9aae60584..d600a522a 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -430,10 +430,14 @@ def get_element_text_and_html_by_tag(tag, html):
return its' content (text) and the whole element (html)
"""
def find_or_raise(haystack, needle, exc):
- try:
+ with contextlib.suppress(ValueError):
return haystack.index(needle)
- except ValueError:
- raise exc
+
+ with contextlib.suppress(ValueError):
+ return haystack.index(needle.upper())
+
+ raise exc
+
closing_tag = f'{tag}>'
whole_start = find_or_raise(
html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))