Merge remote-tracking branch 'upstream/master' into ie/anigamer

2025-03-09 12:50:23 -05:00 · 2024-10-30 18:27:57 +13:00 · 2024-10-30 18:27:57 +13:00 · 50b2820684
commit 50b2820684
parent ce031318fd 5bc5fb2835
3 changed files with 11 additions and 9 deletions
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@ -208,7 +208,6 @@ def sign(self, user, pw, clid):
    def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_flat=False):
        track_id = str(info['id'])
        title = info['title']
        format_urls = set()
        formats = []
@ -367,7 +366,7 @@ def extract_count(key):
            'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
            'uploader_url': user.get('permalink_url'),
            'timestamp': unified_timestamp(info.get('created_at')),
-            'title': title,
+            'title': info.get('title'),
            'description': info.get('description'),
            'thumbnails': thumbnails,
            'duration': float_or_none(info.get('duration'), 1000),
@ -377,7 +376,8 @@ def extract_count(key):
            'like_count': extract_count('favoritings') or extract_count('likes'),
            'comment_count': extract_count('comment'),
            'repost_count': extract_count('reposts'),
-            'genres': traverse_obj(info, ('genre', {str}, {lambda x: x or None}, all)),
+            'genres': traverse_obj(info, ('genre', {str}, filter, all, filter)),
            'artists': traverse_obj(info, ('publisher_metadata', 'artist', {str}, filter, all, filter)),
            'formats': formats if not extract_flat else None,
        }
@ -429,7 +429,6 @@ class SoundcloudIE(SoundcloudBaseIE):
                'repost_count': int,
                'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg',
                'uploader_url': 'https://soundcloud.com/ethmusic',
                'genres': [],
            },
        },
        # geo-restricted
@ -453,6 +452,7 @@ class SoundcloudIE(SoundcloudBaseIE):
                'uploader_url': 'https://soundcloud.com/the-concept-band',
                'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg',
                'genres': ['Alternative'],
                'artists': ['The Royal Concept'],
            },
        },
        # private link
@ -525,6 +525,7 @@ class SoundcloudIE(SoundcloudBaseIE):
                'repost_count': int,
                'view_count': int,
                'genres': ['Dance & EDM'],
                'artists': ['80M'],
            },
        },
        # private link, downloadable format
@ -549,6 +550,7 @@ class SoundcloudIE(SoundcloudBaseIE):
                'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg',
                'uploader_url': 'https://soundcloud.com/oriuplift',
                'genres': ['Trance'],
                'artists': ['Ori Uplift'],
            },
        },
        # no album art, use avatar pic for thumbnail
@ -572,7 +574,7 @@ class SoundcloudIE(SoundcloudBaseIE):
                'comment_count': int,
                'repost_count': int,
                'uploader_url': 'https://soundcloud.com/garyvee',
-                'genres': [],
+                'artists': ['MadReal'],
            },
            'params': {
                'skip_download': True,
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@ -5165,6 +5165,7 @@ class _UnsafeExtensionError(Exception):
        'ico',
        'image',
        'jng',
        'jpe',
        'jpeg',
        'jxl',
        'svg',
--- a/yt_dlp/utils/traversal.py
+++ b/yt_dlp/utils/traversal.py
@ -391,14 +391,13 @@ def find_element(*, tag: str, html=False): ...
 def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False):
    # deliberately using `id=` and `cls=` for ease of readability
    assert tag or id or cls or (attr and value), 'One of tag, id, cls or (attr AND value) is required'
-    if not tag:
+    ANY_TAG = r'[\w:.-]+'
        tag = r'[\w:.-]+'
    if attr and value:
        assert not cls, 'Cannot match both attr and cls'
        assert not id, 'Cannot match both attr and id'
        func = get_element_html_by_attribute if html else get_element_by_attribute
-        return functools.partial(func, attr, value, tag=tag)
+        return functools.partial(func, attr, value, tag=tag or ANY_TAG)
    elif cls:
        assert not id, 'Cannot match both cls and id'
@ -408,7 +407,7 @@ def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=Fal
    elif id:
        func = get_element_html_by_id if html else get_element_by_id
-        return functools.partial(func, id, tag=tag)
+        return functools.partial(func, id, tag=tag or ANY_TAG)
    index = int(bool(html))
    return lambda html: get_element_text_and_html_by_tag(tag, html)[index]