[myspass] fix video URL extraction and improve metadata extraction(closes #22448)

pull/23000/head
Remita Amine 2019-11-04 20:05:27 +01:00
parent 2349255abd
commit 3e49083604
1 changed files with 29 additions and 46 deletions

View File

@ -1,73 +1,56 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import os.path
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_str
compat_urllib_parse_urlparse,
)
from ..utils import ( from ..utils import (
ExtractorError, int_or_none,
parse_duration,
xpath_text,
) )
class MySpassIE(InfoExtractor): class MySpassIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?myspass\.de/.*' _VALID_URL = r'https?://(?:www\.)?myspass\.de/([^/]+/)*(?P<id>\d+)'
_TEST = { _TEST = {
'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/', 'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
'md5': '0b49f4844a068f8b33f4b7c88405862b', 'md5': '0b49f4844a068f8b33f4b7c88405862b',
'info_dict': { 'info_dict': {
'id': '11741', 'id': '11741',
'ext': 'mp4', 'ext': 'mp4',
'description': 'Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?', 'description': 'Wer kann in die Fußstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?',
'title': 'Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2', 'title': '17.02.2013 - Die Highlights, Teil 2',
}, },
} }
def _real_extract(self, url): def _real_extract(self, url):
META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s' video_id = self._match_id(url)
# video id is the last path element of the URL
# usually there is a trailing slash, so also try the second but last
url_path = compat_urllib_parse_urlparse(url).path
url_parent_path, video_id = os.path.split(url_path)
if not video_id:
_, video_id = os.path.split(url_parent_path)
# get metadata
metadata_url = META_DATA_URL_TEMPLATE % video_id
metadata = self._download_xml( metadata = self._download_xml(
metadata_url, video_id, transform_source=lambda s: s.strip()) 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=' + video_id,
video_id)
# extract values from metadata title = xpath_text(metadata, 'title', fatal=True)
url_flv_el = metadata.find('url_flv') video_url = xpath_text(metadata, 'url_flv', 'download url', True)
if url_flv_el is None: video_id_int = int(video_id)
raise ExtractorError('Unable to extract download url') for group in re.search(r'/myspass2009/\d+/(\d+)/(\d+)/(\d+)/', video_url).groups():
video_url = url_flv_el.text group_int = int(group)
title_el = metadata.find('title') if group_int > video_id_int:
if title_el is None: video_url = video_url.replace(
raise ExtractorError('Unable to extract title') group, compat_str(group_int // video_id_int))
title = title_el.text
format_id_el = metadata.find('format_id')
if format_id_el is None:
format = 'mp4'
else:
format = format_id_el.text
description_el = metadata.find('description')
if description_el is not None:
description = description_el.text
else:
description = None
imagePreview_el = metadata.find('imagePreview')
if imagePreview_el is not None:
thumbnail = imagePreview_el.text
else:
thumbnail = None
return { return {
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_url,
'title': title, 'title': title,
'format': format, 'thumbnail': xpath_text(metadata, 'imagePreview'),
'thumbnail': thumbnail, 'description': xpath_text(metadata, 'description'),
'description': description, 'duration': parse_duration(xpath_text(metadata, 'duration')),
'series': xpath_text(metadata, 'format'),
'season_number': int_or_none(xpath_text(metadata, 'season')),
'season_id': xpath_text(metadata, 'season_id'),
'episode': title,
'episode_number': int_or_none(xpath_text(metadata, 'episode')),
} }