[YouTube] Support Releases tab

pull/32188/head
dirkf 2023-04-23 22:58:35 +01:00
parent 211cbfd5d4
commit 64d6dd64c8
2 changed files with 74 additions and 49 deletions

View File

@ -31,6 +31,7 @@ from ..utils import (
extract_attributes, extract_attributes,
get_element_by_attribute, get_element_by_attribute,
int_or_none, int_or_none,
join_nonempty,
js_to_json, js_to_json,
LazyList, LazyList,
merge_dicts, merge_dicts,
@ -45,6 +46,7 @@ from ..utils import (
str_to_int, str_to_int,
traverse_obj, traverse_obj,
try_get, try_get,
txt_or_none,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
unsmuggle_url, unsmuggle_url,
@ -2608,6 +2610,17 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'uploader_id': '@lexwill718', 'uploader_id': '@lexwill718',
}, },
'playlist_mincount': 75, 'playlist_mincount': 75,
}, {
# Releases tab
'url': 'https://www.youtube.com/@daftpunk/releases',
'info_dict': {
'id': 'UC_kRDKYrUlrbtrSiyu5Tflg',
'title': 'Daft Punk - Releases',
'description': 'Daft Punk (1993 - 2021) - Official YouTube Channel',
'uploader_id': '@daftpunk',
'uploader': 'Daft Punk',
},
'playlist_mincount': 36,
}, { }, {
'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA', 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
'only_matching': True, 'only_matching': True,
@ -2822,6 +2835,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
continue continue
return renderer return renderer
@staticmethod
def _get_text(r, k):
return traverse_obj(
r, (k, 'runs', 0, 'text'), (k, 'simpleText'),
expected_type=txt_or_none)
def _grid_entries(self, grid_renderer): def _grid_entries(self, grid_renderer):
for item in grid_renderer['items']: for item in grid_renderer['items']:
if not isinstance(item, dict): if not isinstance(item, dict):
@ -2829,9 +2848,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
renderer = self._extract_grid_item_renderer(item) renderer = self._extract_grid_item_renderer(item)
if not isinstance(renderer, dict): if not isinstance(renderer, dict):
continue continue
title = try_get( title = self._get_text(renderer, 'title')
renderer, (lambda x: x['title']['runs'][0]['text'],
lambda x: x['title']['simpleText']), compat_str)
# playlist # playlist
playlist_id = renderer.get('playlistId') playlist_id = renderer.get('playlistId')
if playlist_id: if playlist_id:
@ -2848,8 +2865,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
# channel # channel
channel_id = renderer.get('channelId') channel_id = renderer.get('channelId')
if channel_id: if channel_id:
title = try_get( title = self._get_text(renderer, 'title')
renderer, lambda x: x['title']['simpleText'], compat_str)
yield self.url_result( yield self.url_result(
'https://www.youtube.com/channel/%s' % channel_id, 'https://www.youtube.com/channel/%s' % channel_id,
ie=YoutubeTabIE.ie_key(), video_title=title) ie=YoutubeTabIE.ie_key(), video_title=title)
@ -2958,15 +2974,26 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
def _rich_grid_entries(self, contents): def _rich_grid_entries(self, contents):
for content in contents: for content in contents:
video_renderer = try_get( content = traverse_obj(
content, content, ('richItemRenderer', 'content'),
(lambda x: x['richItemRenderer']['content']['videoRenderer'], expected_type=dict) or {}
lambda x: x['richItemRenderer']['content']['reelItemRenderer']), video_renderer = traverse_obj(
dict) content, 'videoRenderer', 'reelItemRenderer',
expected_type=dict)
if video_renderer: if video_renderer:
entry = self._video_entry(video_renderer) entry = self._video_entry(video_renderer)
if entry: if entry:
yield entry yield entry
# playlist
renderer = traverse_obj(
content, 'playlistRenderer', expected_type=dict) or {}
title = self._get_text(renderer, 'title')
playlist_id = renderer.get('playlistId')
if playlist_id:
yield self.url_result(
'https://www.youtube.com/playlist?list=%s' % playlist_id,
ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
video_title=title)
@staticmethod @staticmethod
def _build_continuation_query(continuation, ctp=None): def _build_continuation_query(continuation, ctp=None):
@ -3071,6 +3098,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
return return
for entry in self._rich_grid_entries(rich_grid_renderer.get('contents') or []): for entry in self._rich_grid_entries(rich_grid_renderer.get('contents') or []):
yield entry yield entry
continuation = self._extract_continuation(rich_grid_renderer) continuation = self._extract_continuation(rich_grid_renderer)
ytcfg = self._extract_ytcfg(item_id, webpage) ytcfg = self._extract_ytcfg(item_id, webpage)
@ -3213,50 +3241,41 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
uploader['channel'] = uploader['uploader'] uploader['channel'] = uploader['uploader']
return uploader return uploader
@staticmethod @classmethod
def _extract_alert(data): def _extract_alert(cls, data):
alerts = [] alerts = []
for alert in try_get(data, lambda x: x['alerts'], list) or []: for alert in traverse_obj(data, ('alerts', Ellipsis), expected_type=dict):
if not isinstance(alert, dict): alert_text = traverse_obj(
continue alert, (None, lambda x: x['alertRenderer']['text']), get_all=False)
alert_text = try_get(
alert, lambda x: x['alertRenderer']['text'], dict)
if not alert_text: if not alert_text:
continue continue
text = try_get( text = cls._get_text(alert_text, 'text')
alert_text,
(lambda x: x['simpleText'], lambda x: x['runs'][0]['text']),
compat_str)
if text: if text:
alerts.append(text) alerts.append(text)
return '\n'.join(alerts) return '\n'.join(alerts)
def _extract_from_tabs(self, item_id, webpage, data, tabs): def _extract_from_tabs(self, item_id, webpage, data, tabs):
selected_tab = self._extract_selected_tab(tabs) selected_tab = self._extract_selected_tab(tabs)
renderer = try_get( renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'),
data, lambda x: x['metadata']['channelMetadataRenderer'], dict) expected_type=dict) or {}
playlist_id = item_id playlist_id = item_id
title = description = None title = description = None
if renderer: if renderer:
channel_title = renderer.get('title') or item_id channel_title = txt_or_none(renderer.get('title')) or item_id
tab_title = selected_tab.get('title') tab_title = txt_or_none(selected_tab.get('title'))
title = channel_title or item_id title = join_nonempty(
if tab_title: channel_title or item_id, tab_title,
title += ' - %s' % tab_title txt_or_none(selected_tab.get('expandedText')),
if selected_tab.get('expandedText'): delim=' - ')
title += ' - %s' % selected_tab['expandedText'] description = txt_or_none(renderer.get('description'))
description = renderer.get('description') playlist_id = txt_or_none(renderer.get('externalId')) or playlist_id
playlist_id = renderer.get('externalId')
else: else:
renderer = try_get( renderer = traverse_obj(data,
data, lambda x: x['metadata']['playlistMetadataRenderer'], dict) ('metadata', 'playlistMetadataRenderer'),
if renderer: ('header', 'hashtagHeaderRenderer'),
title = renderer.get('title') expected_type=dict) or {}
else: title = traverse_obj(renderer, 'title', ('hashtag', 'simpleText'),
renderer = try_get( expected_type=txt_or_none)
data, lambda x: x['header']['hashtagHeaderRenderer'], dict)
if renderer:
title = try_get(renderer, lambda x: x['hashtag']['simpleText'])
playlist = self.playlist_result( playlist = self.playlist_result(
self._entries(selected_tab, item_id, webpage), self._entries(selected_tab, item_id, webpage),
playlist_id=playlist_id, playlist_title=title, playlist_id=playlist_id, playlist_title=title,
@ -3264,15 +3283,16 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
return merge_dicts(playlist, self._extract_uploader(renderer, data)) return merge_dicts(playlist, self._extract_uploader(renderer, data))
def _extract_from_playlist(self, item_id, url, data, playlist): def _extract_from_playlist(self, item_id, url, data, playlist):
title = playlist.get('title') or try_get( title = traverse_obj((playlist, data),
data, lambda x: x['titleText']['simpleText'], compat_str) (0, 'title'), (1, 'titleText', 'simpleText'),
playlist_id = playlist.get('playlistId') or item_id expected_type=txt_or_none)
playlist_id = txt_or_none(playlist.get('playlistId')) or item_id
# Inline playlist rendition continuation does not always work # Inline playlist rendition continuation does not always work
# at Youtube side, so delegating regular tab-based playlist URL # at Youtube side, so delegating regular tab-based playlist URL
# processing whenever possible. # processing whenever possible.
playlist_url = urljoin(url, try_get( playlist_url = urljoin(url, traverse_obj(
playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'], playlist, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
compat_str)) expected_type=url_or_none))
if playlist_url and playlist_url != url: if playlist_url and playlist_url != url:
return self.url_result( return self.url_result(
playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id, playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

View File

@ -3753,6 +3753,11 @@ def strip_or_none(v, default=None):
return v.strip() if isinstance(v, compat_str) else default return v.strip() if isinstance(v, compat_str) else default
def txt_or_none(v, default=None):
""" Combine str/strip_or_none, disallow blank value (for traverse_obj) """
return default if v is None else (compat_str(v).strip() or default)
def url_or_none(url): def url_or_none(url):
if not url or not isinstance(url, compat_str): if not url or not isinstance(url, compat_str):
return None return None
@ -4096,8 +4101,8 @@ def escape_url(url):
).geturl() ).geturl()
def parse_qs(url): def parse_qs(url, **kwargs):
return compat_parse_qs(compat_urllib_parse.urlparse(url).query) return compat_parse_qs(compat_urllib_parse.urlparse(url).query, **kwargs)
def read_batch_urls(batch_fd): def read_batch_urls(batch_fd):