mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-03-09 12:50:23 -05:00
[parsing] return unclosed matched tags
This commit is contained in:
parent
8451074b50
commit
dbf350c122
2 changed files with 7 additions and 6 deletions
|
@ -218,8 +218,9 @@ def test_get_element_text_and_html_by_tag_malformed(self):
|
|||
get_element_text_and_html_by_tag('malnested_b', html),
|
||||
(f'{inner_text}</malnested_a>',
|
||||
f'<malnested_b>{inner_text}</malnested_a></malnested_b>'))
|
||||
self.assertEqual(
|
||||
get_element_text_and_html_by_tag('orphan', f'<orphan>{html}'), ('', '<orphan>'))
|
||||
self.assertIsNone(get_element_text_and_html_by_tag('orphan', f'{html}</orphan>'))
|
||||
self.assertIsNone(get_element_text_and_html_by_tag('orphan', f'<orphan>{html}'))
|
||||
|
||||
def test_strict_html_parsing(self):
|
||||
class StrictTagParser(HTMLTagParser):
|
||||
|
@ -244,13 +245,13 @@ def test_relaxed_html_parsing(self):
|
|||
parser = HTMLTagParser()
|
||||
|
||||
self.assertEqual(parser.taglist('</p>', reset=True), [])
|
||||
self.assertEqual(parser.taglist('<div><p>', reset=True), [])
|
||||
self.assertEqual(parser.taglist('<div><p>', reset=True), [Tag('div'), Tag('p')])
|
||||
|
||||
tags = parser.taglist('<div><p></div></p>', reset=True)
|
||||
self.assertEqual(tags, [Tag('p'), Tag('div')])
|
||||
self.assertEqual(tags, [Tag('div'), Tag('p')])
|
||||
|
||||
tags = parser.taglist('<div><p>/p></div>', reset=True)
|
||||
self.assertEqual(tags, [Tag('div')])
|
||||
self.assertEqual(tags, [Tag('div'), Tag('p')])
|
||||
|
||||
tags = parser.taglist('<div><p>paragraph</p<ignored></div>', reset=True)
|
||||
self.assertEqual(tags, [Tag('div'), Tag('p')])
|
||||
|
|
|
@ -190,7 +190,7 @@ def handle_starttag(self, tag, attrs):
|
|||
tag_obj = self.Tag(tag, string=self.rawdata, attrs=attrs)
|
||||
tag_obj.openrange(self._offset, len(tag_text))
|
||||
if tag_is_open:
|
||||
nesting = []
|
||||
nesting = [tag_obj]
|
||||
self._nestedtags[-1].append(nesting)
|
||||
self._nestedtags.append(nesting)
|
||||
else:
|
||||
|
@ -218,7 +218,7 @@ def handle_endtag(self, tag):
|
|||
if isinstance(tag_obj, self.Tag):
|
||||
close_idx = self.rawdata.find('>', self._offset) + 1
|
||||
tag_obj.closerange(self._offset, close_idx - self._offset)
|
||||
self._nestedtags.pop().insert(0, tag_obj)
|
||||
self._nestedtags.pop()
|
||||
self.callback(tag_obj)
|
||||
except ValueError as exc:
|
||||
if isinstance(exc, compat_HTMLParseError):
|
||||
|
|
Loading…
Reference in a new issue