[youtube] Improve URL to extractor routing (closes #27572, closes #28335, closes #28742)

pull/28805/head
Sergey M․ 2021-04-17 00:07:32 +07:00
parent 4fb25ff5a3
commit 06159135ef
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
1 changed files with 23 additions and 12 deletions

View File

@ -46,6 +46,10 @@ from ..utils import (
) )
def parse_qs(url):
return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
class YoutubeBaseInfoExtractor(InfoExtractor): class YoutubeBaseInfoExtractor(InfoExtractor):
"""Provide base functions for Youtube extractors""" """Provide base functions for Youtube extractors"""
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin' _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
@ -413,16 +417,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId= |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
) )
)? # all until now is optional -> you can pass the naked ID )? # all until now is optional -> you can pass the naked ID
(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
(?!.*?\blist=
(?:
%(playlist_id)s| # combined list/video URLs are handled by the playlist IE
WL # WL are handled by the watch later IE
)
)
(?(1).+)? # if we found the ID, everything can follow (?(1).+)? # if we found the ID, everything can follow
$""" % { $""" % {
'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
'invidious': '|'.join(_INVIDIOUS_SITES), 'invidious': '|'.join(_INVIDIOUS_SITES),
} }
_PLAYER_INFO_RE = ( _PLAYER_INFO_RE = (
@ -1208,6 +1205,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
} }
@classmethod
def suitable(cls, url):
qs = parse_qs(url)
if qs.get('list', [None])[0]:
return False
return super(YoutubeIE, cls).suitable(url)
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(YoutubeIE, self).__init__(*args, **kwargs) super(YoutubeIE, self).__init__(*args, **kwargs)
self._code_cache = {} self._code_cache = {}
@ -2275,6 +2279,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'title': '#cctv9', 'title': '#cctv9',
}, },
'playlist_mincount': 350, 'playlist_mincount': 350,
}, {
'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
'only_matching': True,
}] }]
@classmethod @classmethod
@ -2764,7 +2771,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
url = compat_urlparse.urlunparse( url = compat_urlparse.urlunparse(
compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com')) compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
# Handle both video/playlist URLs # Handle both video/playlist URLs
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) qs = parse_qs(url)
video_id = qs.get('v', [None])[0] video_id = qs.get('v', [None])[0]
playlist_id = qs.get('list', [None])[0] playlist_id = qs.get('list', [None])[0]
if video_id and playlist_id: if video_id and playlist_id:
@ -2860,12 +2867,16 @@ class YoutubePlaylistIE(InfoExtractor):
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
return False if YoutubeTabIE.suitable(url) else super( if YoutubeTabIE.suitable(url):
YoutubePlaylistIE, cls).suitable(url) return False
qs = parse_qs(url)
if qs.get('v', [None])[0]:
return False
return super(YoutubePlaylistIE, cls).suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = self._match_id(url) playlist_id = self._match_id(url)
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) qs = parse_qs(url)
if not qs: if not qs:
qs = {'list': playlist_id} qs = {'list': playlist_id}
return self.url_result( return self.url_result(