From 29278a3323be5106809e43d2977efcd0e3159a4f Mon Sep 17 00:00:00 2001
From: Marcel
Date: Sun, 27 Nov 2022 16:56:45 +0100
Subject: [PATCH] [parsing] fix return value
---
test/test_parsing.py | 16 +++++++++++++---
yt_dlp/parsing.py | 4 ++--
2 files changed, 15 insertions(+), 5 deletions(-)
diff --git a/test/test_parsing.py b/test/test_parsing.py
index 588711518..e21299df0 100644
--- a/test/test_parsing.py
+++ b/test/test_parsing.py
@@ -195,7 +195,7 @@ class StrictParser(MatchingElementParser):
ignore
'''
items = get_elements_text_and_html_by_tag('img', test_string)
- self.assertListEqual(items, [('', '
'), ('', '
')])
+ self.assertEqual(items, [('', '
'), ('', '
')])
self.assertEqual(
StrictParser.get_element_text_and_html_by_tag('use', ''),
@@ -245,16 +245,26 @@ def test_relaxed_html_parsing(self):
parser = HTMLTagParser()
self.assertEqual(parser.taglist('
', reset=True), [])
- self.assertEqual(parser.taglist('', reset=True), [Tag('div'), Tag('p')])
+
+ tags = parser.taglist('
', reset=True)
+ self.assertEqual(tags, [Tag('div'), Tag('p')])
+ self.assertEqual(tags[0].text_and_html(), ('', '
'))
+ self.assertEqual(tags[1].text_and_html(), ('', '
'))
tags = parser.taglist('
', reset=True)
self.assertEqual(tags, [Tag('div'), Tag('p')])
+ self.assertEqual(tags[0].text_and_html(), ('
', '
'))
+ self.assertEqual(tags[1].text_and_html(), ('
', '
'))
tags = parser.taglist('
', reset=True)
self.assertEqual(tags, [Tag('div'), Tag('p')])
+ self.assertEqual(tags[0].text_and_html(), ('
/p>', '
'))
+ self.assertEqual(tags[1].text_and_html(), ('', '
'))
tags = parser.taglist('
', reset=True)
self.assertEqual(tags, [Tag('div'), Tag('p')])
+ self.assertEqual(tags[0].text_and_html(),
+ ('
paragraph
', '
'))
self.assertEqual(tags[1].text_and_html(), ('paragraph', '
paragraph
'))
tags = parser.taglist('
![]()
must be empty', reset=True)
@@ -315,7 +325,7 @@ def test_tag_return_order(self):
[Tag('t5'), Tag('t6')]],
[Tag('t7'), Tag('t8')]]))
- def test_within_html_comment(self):
+ def test_html_comment_ranges(self):
def mark_comments(_string, char='^', nochar='-'):
cmts = HTMLCommentRanges(_string)
return "".join(char if _idx in cmts else nochar for _idx in range(len(_string)))
diff --git a/yt_dlp/parsing.py b/yt_dlp/parsing.py
index d2c260954..8751cd5f9 100644
--- a/yt_dlp/parsing.py
+++ b/yt_dlp/parsing.py
@@ -318,7 +318,7 @@ def get_elements_text_and_html_by_attribute(cls, *args, **kwargs):
@classmethod
def get_elements_by_attribute(cls, *args, **kwargs):
- return [tag.text_and_html()[0] for tag in cls.tags_by_attribute(*args, **kwargs)]
+ return [tag.text() for tag in cls.tags_by_attribute(*args, **kwargs)]
@classmethod
def get_elements_html_by_attribute(cls, *args, **kwargs):
@@ -349,7 +349,7 @@ def get_elements_html_by_class(cls, class_name, html):
@classmethod
def get_elements_text_and_html_by_class(cls, class_name, html):
value = cls.class_value_regex(class_name)
- return [tag.text() for tag
+ return [tag.text_and_html() for tag
in cls.tags_by_attribute('class', value, html, escape_value=False)]
@classmethod