[NHK] Support alphabetic characters in 7-char NhkVod IDs (#29682)
parent
e988fa4523
commit
c7965b9fc2
|
@ -1,3 +1,4 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
@ -23,7 +24,7 @@ class NhkBaseIE(InfoExtractor):
|
|||
def _extract_episode_info(self, url, episode=None):
|
||||
fetch_episode = episode is None
|
||||
lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups()
|
||||
if episode_id.isdigit():
|
||||
if len(episode_id) == 7:
|
||||
episode_id = episode_id[:4] + '-' + episode_id[4:]
|
||||
|
||||
is_video = m_type == 'video'
|
||||
|
@ -84,7 +85,8 @@ class NhkBaseIE(InfoExtractor):
|
|||
|
||||
|
||||
class NhkVodIE(NhkBaseIE):
|
||||
_VALID_URL = r'%s%s(?P<id>\d{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
|
||||
# the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg
|
||||
_VALID_URL = r'%s%s(?P<id>[0-9a-z]{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
|
||||
# Content available only for a limited period of time. Visit
|
||||
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
|
||||
_TESTS = [{
|
||||
|
@ -124,6 +126,19 @@ class NhkVodIE(NhkBaseIE):
|
|||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# video, alphabetic character in ID #29670
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/',
|
||||
'only_matching': True,
|
||||
'info_dict': {
|
||||
'id': 'qfjay6cg',
|
||||
'ext': 'mp4',
|
||||
'title': 'DESIGN TALKS plus - Fishermen’s Finery',
|
||||
'description': 'md5:8a8f958aaafb0d7cb59d38de53f1e448',
|
||||
'thumbnail': r're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$',
|
||||
'upload_date': '20210615',
|
||||
'timestamp': 1623722008,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
|
Loading…
Reference in New Issue