From d9195b5133946d53f13434849433c4760243b040 Mon Sep 17 00:00:00 2001
From: Allen <64094914+allendema@users.noreply.github.com>
Date: Wed, 4 Dec 2024 23:10:45 +0100
Subject: [PATCH 1/3] [core] utils: fix get_element_by_*

---
 test/test_utils.py     | 20 ++++++++++++++++++++
 yt_dlp/utils/_utils.py |  2 +-
 2 files changed, 21 insertions(+), 1 deletion(-)
diff --git a/test/test_utils.py b/test/test_utils.py
index b3de14198..2e26d224a 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1769,6 +1769,10 @@ def test_get_element_html_by_class(self):
         <div itemprop="author" itemscope>foo</div>
     '''
 
+    GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING_UPPERCASE = '''
+        <DIV itemprop="author" itemscope>foo</DIV>
+    '''
+
     def test_get_element_by_attribute(self):
         html = self.GET_ELEMENT_BY_CLASS_TEST_STRING
 
@@ -1780,6 +1784,10 @@ def test_get_element_by_attribute(self):
 
         self.assertEqual(get_element_by_attribute('itemprop', 'author', html), 'foo')
 
+        html = self.GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING_UPPERCASE
+
+        self.assertEqual(get_element_by_attribute('itemprop', 'author', html), 'foo')
+
     def test_get_element_html_by_attribute(self):
         html = self.GET_ELEMENT_BY_CLASS_TEST_STRING
 
@@ -1851,6 +1859,11 @@ def test_get_elements_text_and_html_by_attribute(self):
     GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[78:119]
     GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT = GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML[6:-7]
 
+
+    GET_ELEMENT_BY_TAG_TEST_STRING_UPPERCASE = '''
+        <SPAN id="foo">nice</SPAN>
+    '''
+
     def test_get_element_text_and_html_by_tag(self):
         html = self.GET_ELEMENT_BY_TAG_TEST_STRING
 
@@ -1860,8 +1873,15 @@ def test_get_element_text_and_html_by_tag(self):
         self.assertEqual(
             get_element_text_and_html_by_tag('span', html),
             (self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT, self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML))
+
         self.assertRaises(compat_HTMLParseError, get_element_text_and_html_by_tag, 'article', html)
 
+        html = self.GET_ELEMENT_BY_TAG_TEST_STRING_UPPERCASE
+
+        self.assertEqual(
+            get_element_text_and_html_by_tag('SPAN', html),
+            ('nice', html.strip()), html)
+
     def test_iri_to_uri(self):
         self.assertEqual(
             iri_to_uri('https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b'),
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index 699bf1e7f..9aae60584 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -442,7 +442,7 @@ def find_or_raise(haystack, needle, exc):
     content_start += whole_start + 1
     with HTMLBreakOnClosingTagParser() as parser:
         parser.feed(html[whole_start:content_start])
-        if not parser.tagstack or parser.tagstack[0] != tag:
+        if not parser.tagstack or parser.tagstack[0] != tag.lower():
             raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
         offset = content_start
         while offset < len(html):

From ec3a0927c4c9c10c062e0953dd8fd1a6df2c0263 Mon Sep 17 00:00:00 2001
From: Allen <64094914+allendema@users.noreply.github.com>
Date: Wed, 4 Dec 2024 23:30:05 +0100
Subject: [PATCH 2/3] fixup! [core] utils: fix get_element_by_*

---
 test/test_utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/test_utils.py b/test/test_utils.py
index 2e26d224a..e8ef15e01 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1859,7 +1859,6 @@ def test_get_elements_text_and_html_by_attribute(self):
     GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[78:119]
     GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT = GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML[6:-7]
 
-
     GET_ELEMENT_BY_TAG_TEST_STRING_UPPERCASE = '''
         <SPAN id="foo">nice</SPAN>
     '''

From c49aa772cc0ae6f8cb92c56315bbf01b45c61a8e Mon Sep 17 00:00:00 2001
From: Allen <64094914+allendema@users.noreply.github.com>
Date: Thu, 5 Dec 2024 00:43:15 +0100
Subject: [PATCH 3/3] [core] utils: fix some cases

---
 test/test_utils.py     | 15 ++++-----------
 yt_dlp/utils/_utils.py | 10 +++++++---
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py
index e8ef15e01..896cf9f32 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1846,7 +1846,7 @@ def test_get_elements_text_and_html_by_attribute(self):
     random text lorem ipsum</p>
     <div>
         this should be returned
-        <span>this should also be returned</span>
+        <SPAN>this should also be returned</SPAN>
         <div>
             this should also be returned
         </div>
@@ -1859,10 +1859,6 @@ def test_get_elements_text_and_html_by_attribute(self):
     GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[78:119]
     GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT = GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML[6:-7]
 
-    GET_ELEMENT_BY_TAG_TEST_STRING_UPPERCASE = '''
-        <SPAN id="foo">nice</SPAN>
-    '''
-
     def test_get_element_text_and_html_by_tag(self):
         html = self.GET_ELEMENT_BY_TAG_TEST_STRING
 
@@ -1872,14 +1868,11 @@ def test_get_element_text_and_html_by_tag(self):
         self.assertEqual(
             get_element_text_and_html_by_tag('span', html),
             (self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT, self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML))
-
-        self.assertRaises(compat_HTMLParseError, get_element_text_and_html_by_tag, 'article', html)
-
-        html = self.GET_ELEMENT_BY_TAG_TEST_STRING_UPPERCASE
-
         self.assertEqual(
             get_element_text_and_html_by_tag('SPAN', html),
-            ('nice', html.strip()), html)
+            (self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT, self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML))
+
+        self.assertRaises(compat_HTMLParseError, get_element_text_and_html_by_tag, 'article', html)
 
     def test_iri_to_uri(self):
         self.assertEqual(
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index 9aae60584..d600a522a 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -430,10 +430,14 @@ def get_element_text_and_html_by_tag(tag, html):
     return its' content (text) and the whole element (html)
     """
     def find_or_raise(haystack, needle, exc):
-        try:
+        with contextlib.suppress(ValueError):
             return haystack.index(needle)
-        except ValueError:
-            raise exc
+
+        with contextlib.suppress(ValueError):
+            return haystack.index(needle.upper())
+
+        raise exc
+
     closing_tag = f'</{tag}>'
     whole_start = find_or_raise(
         html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))