diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 955119d40..7377ac7b9 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -7,7 +7,9 @@ from ..utils import ( int_or_none, unescapeHTML, find_xpath_attr, + smuggle_url, ) +from .senateisvp import SenateISVPIE class CSpanIE(InfoExtractor): @@ -40,6 +42,15 @@ class CSpanIE(InfoExtractor): 'title': 'General Motors Ignition Switch Recall', }, 'playlist_duration_sum': 14855, + }, { + # Video from senate.gov + 'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers', + 'md5': '7314c4b96dad66dd8e63dc3518ceaa6f', + 'info_dict': { + 'id': 'judiciary031715', + 'ext': 'flv', + 'title': 'Immigration Reforms Needed to Protect Skilled American Workers', + } }] def _real_extract(self, url): @@ -56,7 +67,7 @@ class CSpanIE(InfoExtractor): # present, otherwise this is a stripped version r'
(.*?)
' ], - webpage, 'description', flags=re.DOTALL) + webpage, 'description', flags=re.DOTALL, default=None) info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id data = self._download_json(info_url, video_id) @@ -68,6 +79,11 @@ class CSpanIE(InfoExtractor): title = find_xpath_attr(doc, './/string', 'name', 'title').text thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text + senate_isvp_url = SenateISVPIE._search_iframe_url(webpage) + if senate_isvp_url: + surl = smuggle_url(senate_isvp_url, {'force_title': title}) + return self.url_result(surl, 'SenateISVP', video_id, title) + files = data['video']['files'] entries = [{ diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index e645d1bb3..ec4d0c210 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -35,6 +35,7 @@ from .rutv import RUTVIE from .smotri import SmotriIE from .condenast import CondeNastIE from .udn import UDNEmbedIE +from .senateisvp import SenateISVPIE class GenericIE(InfoExtractor): @@ -1365,6 +1366,11 @@ class GenericIE(InfoExtractor): return self.url_result( compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed') + # Look for Senate ISVP iframe + senate_isvp_url = SenateISVPIE._search_iframe_url(webpage) + if senate_isvp_url: + return self.url_result(surl, 'SenateISVP') + def check_video(vurl): if YoutubeIE.suitable(vurl): return True diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dl/extractor/senateisvp.py index a93874cad..23e1cd944 100644 --- a/youtube_dl/extractor/senateisvp.py +++ b/youtube_dl/extractor/senateisvp.py @@ -3,7 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + unsmuggle_url, +) from ..compat import ( compat_parse_qs, compat_urlparse, @@ -73,12 +76,22 @@ class SenateISVPIE(InfoExtractor): } }] + @staticmethod + def _search_iframe_url(webpage): + mobj = re.search( + r"