[youtube] Improve URL to extractor routing (closes #27572, closes #28335, closes #28742)

2021-04-17 00:07:32 +07:00 · 2021-04-17 00:07:32 +07:00 · 06159135ef
parent 4fb25ff5a3
commit 06159135ef
1 changed files with 23 additions and 12 deletions
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -46,6 +46,10 @@ from ..utils import (
 )


+def parse_qs(url):
+    return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+
+
 class YoutubeBaseInfoExtractor(InfoExtractor):
    """Provide base functions for Youtube extractors"""
    _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
@ -413,16 +417,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                         |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
                         )
                     )?                                                       # all until now is optional -> you can pass the naked ID
-                     (?P<id>[0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
-                     (?!.*?\blist=
-                        (?:
-                            %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
-                            WL                                                # WL are handled by the watch later IE
-                        )
-                     )
+                     (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
                     (?(1).+)?                                                # if we found the ID, everything can follow
                     $""" % {
-        'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
        'invidious': '|'.join(_INVIDIOUS_SITES),
    }
    _PLAYER_INFO_RE = (
@ -1208,6 +1205,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
    }

+    @classmethod
+    def suitable(cls, url):
+        qs = parse_qs(url)
+        if qs.get('list', [None])[0]:
+            return False
+        return super(YoutubeIE, cls).suitable(url)
+
    def __init__(self, *args, **kwargs):
        super(YoutubeIE, self).__init__(*args, **kwargs)
        self._code_cache = {}
@ -2275,6 +2279,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
            'title': '#cctv9',
        },
        'playlist_mincount': 350,
+    }, {
+        'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
+        'only_matching': True,
    }]

    @classmethod
@ -2764,7 +2771,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
        url = compat_urlparse.urlunparse(
            compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
        # Handle both video/playlist URLs
-        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+        qs = parse_qs(url)
        video_id = qs.get('v', [None])[0]
        playlist_id = qs.get('list', [None])[0]
        if video_id and playlist_id:
@ -2860,12 +2867,16 @@ class YoutubePlaylistIE(InfoExtractor):

    @classmethod
    def suitable(cls, url):
-        return False if YoutubeTabIE.suitable(url) else super(
-            YoutubePlaylistIE, cls).suitable(url)
+        if YoutubeTabIE.suitable(url):
+            return False
+        qs = parse_qs(url)
+        if qs.get('v', [None])[0]:
+            return False
+        return super(YoutubePlaylistIE, cls).suitable(url)

    def _real_extract(self, url):
        playlist_id = self._match_id(url)
-        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+        qs = parse_qs(url)
        if not qs:
            qs = {'list': playlist_id}
        return self.url_result(