Support Series page

2022-04-07 23:08:42 +01:00 · 2022-04-07 23:08:42 +01:00 · 65712d99c4
parent 8f6a09b921
commit 65712d99c4
1 changed files with 23 additions and 2 deletions
--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@ -9,7 +9,9 @@ import sys
 from .common import InfoExtractor
 from ..compat import (
    compat_b64decode,
+    compat_parse_qs,
    compat_struct_unpack,
+    compat_urllib_parse_urlparse,
 )
 from ..utils import (
    determine_ext,
@ -25,8 +27,8 @@ _bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(ch


 class RTVEALaCartaIE(InfoExtractor):
-    IE_NAME = 'rtve.es:alacarta'
-    IE_DESC = 'RTVE a la carta'
+    IE_NAME = 'rtve.es:play'
+    IE_DESC = 'RTVE Play'
    _VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?((alacarta|playz?)/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)'

    _TESTS = [{
@ -90,6 +92,12 @@ class RTVEALaCartaIE(InfoExtractor):
    }, {
        'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/',
        'only_matching': True,
+    }, {
+        'url': 'https://www.rtve.es/play/videos/modulos/capitulos/11332/?currentpage=pf_serie',
+        'info_dict': {
+            'id': '11332',
+        },
+        'playlist_mincount': 20,
    }]

    def _real_initialize(self):
@ -164,8 +172,21 @@ class RTVEALaCartaIE(InfoExtractor):
        self._sort_formats(formats)
        return formats

+    def _extract_playlist(self, url, playlist_id):
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        matches = re.findall(r'''<a\b[^>]*\bhref\s*=\s*["'](%s)''' % (self._VALID_URL, ), webpage)
+
+        return self.playlist_from_matches(matches, playlist_id=playlist_id, getter=lambda x: x[0], ie=self.ie_key())
+
    def _real_extract(self, url):
        video_id = self._match_id(url)
+
+        qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+        if 'pf_serie' == qs.get('currentpage', [None])[-1]:
+            return self._extract_playlist(url, video_id)
+
        info = self._download_json(
            'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
            video_id)['page']['items'][0]