[swrmediathek] Extract direct links from JSON and add support for audio files

2014-05-23 21:04:21 +07:00 · 2014-05-23 21:04:21 +07:00 · 7f739999e9
parent 0f8a01d4f3
commit 7f739999e9
1 changed files with 63 additions and 33 deletions
--- a/youtube_dl/extractor/swrmediathek.py
+++ b/youtube_dl/extractor/swrmediathek.py
@ -4,71 +4,101 @@ from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import parse_duration
 class SWRMediathekIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/player\.htm\?show=(?P<videoid>[^?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
    _TESTS = [{
        'url': 'http://swrmediathek.de/player.htm?show=849790d0-dab8-11e3-a953-0026b975f2e6',
        'md5': '8c5f6f0172753368547ca8413a7768ac',
        'info_dict': {
            'id': '849790d0-dab8-11e3-a953-0026b975f2e6',
-            'ext': 'flv',
+            'ext': 'mp4',
            'title': 'SWR odysso',
            'description': 'md5:2012e31baad36162e97ce9eb3f157b8a',
            'thumbnail': 're:^http:.*\.jpg$',
-        },
+            'duration': 2602,
-        'params': {
+            'upload_date': '20140515',
-            'skip_download': True,  # requires rtmpdump
+            'uploader': 'SWR Fernsehen',
            'uploader_id': '990030',
        },
    }, {
        'url': 'http://swrmediathek.de/player.htm?show=0e1a8510-ddf2-11e3-9be3-0026b975f2e6',
        'md5': 'b10ab854f912eecc5a6b55cd6fc1f545',
        'info_dict': {
            'id': '0e1a8510-ddf2-11e3-9be3-0026b975f2e6',
-            'ext': 'flv',
+            'ext': 'mp4',
            'title': 'Nachtcafé - Alltagsdroge Alkohol - zwischen Sektempfang und Komasaufen',
            'description': 'md5:e0a3adc17e47db2c23aab9ebc36dbee2',
            'thumbnail': 're:http://.*\.jpg',
            'duration': 5305,
            'upload_date': '20140516',
            'uploader': 'SWR Fernsehen',
            'uploader_id': '990030',
        },
-        'params': {
+    }, {
-            'skip_download': True,  # requires rtmpdump
+        'url': 'http://swrmediathek.de/player.htm?show=bba23e10-cb93-11e3-bf7f-0026b975f2e6',
-        },
+        'md5': '4382e4ef2c9d7ce6852535fa867a0dd3',
        'info_dict': {
            'id': 'bba23e10-cb93-11e3-bf7f-0026b975f2e6',
            'ext': 'mp3',
            'title': 'Saša Stanišic: Vor dem Fest',
            'description': 'md5:5b792387dc3fbb171eb709060654e8c9',
            'thumbnail': 're:http://.*\.jpg',
            'duration': 3366,
            'upload_date': '20140520',
            'uploader': 'SWR 2',
            'uploader_id': '284670',
        }
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
+        video_id = mobj.group('id')
-        webpage = self._download_webpage(url, video_id)
+        video = self._download_json(
            'http://swrmediathek.de/AjaxEntry?ekey=%s' % video_id, video_id, 'Downloading video JSON')
-        smilurl = 'http://swrmediathek.de/rtmpQuals/%s/clips.smil'
+        attr = video['attr']
-        smildoc = self._download_xml(smilurl % video_id, video_id, 'Downloading SMIL page')
+        media_type = attr['entry_etype']
        baseurl = smildoc.find('.//meta').attrib['base']
        formats = []
-        for video in smildoc.findall('.//video'):
+        for entry in video['sub']:
-            vbr = video.attrib.get('system-bitrate')
+            if entry['name'] != 'entry_media':
-            if vbr:
+                continue
                vbr = int(vbr) / 1000
-            formats.append({
+            entry_attr = entry['attr']
-                'format_id': video.attrib['height'] + 'p',
+            codec = entry_attr['val0']
-                'width': int_or_none(video.attrib['width']),
+            quality = int(entry_attr['val1'])
-                'height': int_or_none(video.attrib['height']),
+
-                'vbr': vbr,
+            fmt = {
-                'url': baseurl,
+                'url': entry_attr['val2'],
-                'play_path': 'mp4:' + video.attrib['src'],
+                'quality': quality,
-                'ext': 'flv',
+            }
-            })
+
            if media_type == 'Video':
                fmt.update({
                    'format_note': ['144p', '288p', '544p'][quality-1],
                    'vcodec': codec,
                })
            elif media_type == 'Audio':
                fmt.update({
                    'acodec': codec,
                })
            formats.append(fmt)
        self._sort_formats(formats)
        return {
            'id': video_id,
-            'title': self._html_search_meta('title', webpage, 'title', fatal=True),
+            'title': attr['entry_title'],
-            'thumbnail': self._search_regex(r'<link rel="image_src".+href="(.+)" />', webpage, 'thumbnail'),
+            'description': attr['entry_descl'],
            'thumbnail': attr['entry_image_16_9'],
            'duration': parse_duration(attr['entry_durat']),
            'upload_date': attr['entry_pdatet'][:-4],
            'uploader': attr['channel_title'],
            'uploader_id': attr['channel_idkey'],
            'formats': formats,
            'description': self._html_search_meta('description', webpage, 'description'),
        }