[empflix] Add new extractor

2014-05-12 04:10:29 -07:00 · 2014-05-12 04:10:29 -07:00 · 877bea9ce1
parent e399853d0c
commit 877bea9ce1
2 changed files with 47 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -72,6 +72,7 @@ from .ehow import EHowIE
 from .eighttracks import EightTracksIE
 from .eitb import EitbIE
 from .elpais import ElPaisIE
 from .empflix import EmpflixIE
 from .engadget import EngadgetIE
 from .escapist import EscapistIE
 from .everyonesmixtape import EveryonesMixtapeIE
--- a/youtube_dl/extractor/empflix.py
+++ b/youtube_dl/extractor/empflix.py
@ -0,0 +1,46 @@
 import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
 )
 class EmpflixIE(InfoExtractor):
    _VALID_URL = r'^https?://www\.empflix\.com/videos/(?P<videoid>[^\.]+)\.html'
    _TEST = {
        u'url': u'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
        u'file': u'Amateur-Finger-Fuck-33051.flv',
        u'md5': u'5e5cc160f38ca9857f318eb97146e13e',
        u'info_dict': {
            u"title": u"Amateur Finger Fuck",
            u"age_limit": 18,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('videoid')
        # Get webpage content
        webpage = self._download_webpage(url, video_id)
        age_limit = self._rta_search(webpage)
        # Get the video title
        video_title = self._html_search_regex(r'name="title" value="(?P<title>[^"]*)"',
            webpage, u'title').strip()
        cfg_url = self._html_search_regex(r'flashvars\.config = escape\("([^"]+)"',
            webpage, u'flashvars.config').strip()
        cfg_xml = self._download_xml(cfg_url, video_id, note=u'Downloading metadata')
        video_url = cfg_xml.find('videoLink').text
        info = {'id': video_id,
                'url': video_url,
                'title': video_title,
                'ext': 'flv',
                'age_limit': age_limit}
        return [info]