[InfoExtractor] Rework and improve JWPlayer extraction
* use traverse_obj() and _search_json() * support playlist `.load({**video1},{**video2}, ...)` * support transform_source=... for _extract_jwplayer_data()pull/32742/head
parent
7216fa2ac4
commit
f66372403f
|
@ -3021,25 +3021,22 @@ class InfoExtractor(object):
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
|
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
|
||||||
mobj = re.search(
|
return self._search_json(
|
||||||
r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''',
|
r'''(?<!-)\bjwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?:(?!</script>).)*?\.\s*(?:setup\s*\(|(?P<load>load)\s*\(\s*\[)''',
|
||||||
webpage)
|
webpage, 'JWPlayer data', video_id,
|
||||||
if mobj:
|
# must be a {...} or sequence, ending
|
||||||
try:
|
contains_pattern=r'\{[\s\S]*}(?(load)(?:\s*,\s*\{[\s\S]*})*)', end_pattern=r'(?(load)\]|\))',
|
||||||
jwplayer_data = self._parse_json(mobj.group('options'),
|
transform_source=transform_source, default=None)
|
||||||
video_id=video_id,
|
|
||||||
transform_source=transform_source)
|
|
||||||
except ExtractorError:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
if isinstance(jwplayer_data, dict):
|
|
||||||
return jwplayer_data
|
|
||||||
|
|
||||||
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
|
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
|
||||||
jwplayer_data = self._find_jwplayer_data(
|
|
||||||
webpage, video_id, transform_source=js_to_json)
|
# allow passing `transform_source` through to _find_jwplayer_data()
|
||||||
return self._parse_jwplayer_data(
|
transform_source = kwargs.pop('transform_source', None)
|
||||||
jwplayer_data, video_id, *args, **kwargs)
|
kwfind = compat_kwargs({'transform_source': transform_source}) if transform_source else {}
|
||||||
|
|
||||||
|
jwplayer_data = self._find_jwplayer_data(webpage, video_id, **kwfind)
|
||||||
|
|
||||||
|
return self._parse_jwplayer_data(jwplayer_data, video_id, *args, **kwargs)
|
||||||
|
|
||||||
def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
|
def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
|
||||||
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
|
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
|
||||||
|
@ -3073,22 +3070,14 @@ class InfoExtractor(object):
|
||||||
mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
|
mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
tracks = video_data.get('tracks')
|
for track in traverse_obj(video_data, (
|
||||||
if tracks and isinstance(tracks, list):
|
'tracks', lambda _, t: t.get('kind').lower() in ('captions', 'subtitles'))):
|
||||||
for track in tracks:
|
track_url = urljoin(base_url, track.get('file'))
|
||||||
if not isinstance(track, dict):
|
if not track_url:
|
||||||
continue
|
continue
|
||||||
track_kind = track.get('kind')
|
subtitles.setdefault(track.get('label') or 'en', []).append({
|
||||||
if not track_kind or not isinstance(track_kind, compat_str):
|
'url': self._proto_relative_url(track_url)
|
||||||
continue
|
})
|
||||||
if track_kind.lower() not in ('captions', 'subtitles'):
|
|
||||||
continue
|
|
||||||
track_url = urljoin(base_url, track.get('file'))
|
|
||||||
if not track_url:
|
|
||||||
continue
|
|
||||||
subtitles.setdefault(track.get('label') or 'en', []).append({
|
|
||||||
'url': self._proto_relative_url(track_url)
|
|
||||||
})
|
|
||||||
|
|
||||||
entry = {
|
entry = {
|
||||||
'id': this_video_id,
|
'id': this_video_id,
|
||||||
|
|
Loading…
Reference in New Issue