From 29278a3323be5106809e43d2977efcd0e3159a4f Mon Sep 17 00:00:00 2001 From: Marcel Date: Sun, 27 Nov 2022 16:56:45 +0100 Subject: [PATCH] [parsing] fix return value --- test/test_parsing.py | 16 +++++++++++++--- yt_dlp/parsing.py | 4 ++-- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/test/test_parsing.py b/test/test_parsing.py index 588711518..e21299df0 100644 --- a/test/test_parsing.py +++ b/test/test_parsing.py @@ -195,7 +195,7 @@ class StrictParser(MatchingElementParser): ignore ''' items = get_elements_text_and_html_by_tag('img', test_string) - self.assertListEqual(items, [('', ''), ('', '')]) + self.assertEqual(items, [('', ''), ('', '')]) self.assertEqual( StrictParser.get_element_text_and_html_by_tag('use', ''), @@ -245,16 +245,26 @@ def test_relaxed_html_parsing(self): parser = HTMLTagParser() self.assertEqual(parser.taglist('

', reset=True), []) - self.assertEqual(parser.taglist('

', reset=True), [Tag('div'), Tag('p')]) + + tags = parser.taglist('

', reset=True) + self.assertEqual(tags, [Tag('div'), Tag('p')]) + self.assertEqual(tags[0].text_and_html(), ('', '

')) + self.assertEqual(tags[1].text_and_html(), ('', '

')) tags = parser.taglist('

', reset=True) self.assertEqual(tags, [Tag('div'), Tag('p')]) + self.assertEqual(tags[0].text_and_html(), ('

', '

')) + self.assertEqual(tags[1].text_and_html(), ('
', '

')) tags = parser.taglist('

/p>

', reset=True) self.assertEqual(tags, [Tag('div'), Tag('p')]) + self.assertEqual(tags[0].text_and_html(), ('

/p>', '

/p>

')) + self.assertEqual(tags[1].text_and_html(), ('', '

')) tags = parser.taglist('

paragraph

', reset=True) self.assertEqual(tags, [Tag('div'), Tag('p')]) + self.assertEqual(tags[0].text_and_html(), + ('

paragraph', '

paragraph

')) self.assertEqual(tags[1].text_and_html(), ('paragraph', '

paragraph')) tags = parser.taglist('must be empty', reset=True) @@ -315,7 +325,7 @@ def test_tag_return_order(self): [Tag('t5'), Tag('t6')]], [Tag('t7'), Tag('t8')]])) - def test_within_html_comment(self): + def test_html_comment_ranges(self): def mark_comments(_string, char='^', nochar='-'): cmts = HTMLCommentRanges(_string) return "".join(char if _idx in cmts else nochar for _idx in range(len(_string))) diff --git a/yt_dlp/parsing.py b/yt_dlp/parsing.py index d2c260954..8751cd5f9 100644 --- a/yt_dlp/parsing.py +++ b/yt_dlp/parsing.py @@ -318,7 +318,7 @@ def get_elements_text_and_html_by_attribute(cls, *args, **kwargs): @classmethod def get_elements_by_attribute(cls, *args, **kwargs): - return [tag.text_and_html()[0] for tag in cls.tags_by_attribute(*args, **kwargs)] + return [tag.text() for tag in cls.tags_by_attribute(*args, **kwargs)] @classmethod def get_elements_html_by_attribute(cls, *args, **kwargs): @@ -349,7 +349,7 @@ def get_elements_html_by_class(cls, class_name, html): @classmethod def get_elements_text_and_html_by_class(cls, class_name, html): value = cls.class_value_regex(class_name) - return [tag.text() for tag + return [tag.text_and_html() for tag in cls.tags_by_attribute('class', value, html, escape_value=False)] @classmethod