[BR] Add "BR" extractor

Extractor for videos from the Bayerischer Rundfunk Mediathek[1]. Currently only
supports videos. Audio and podcasts do not work yet with this extractor.

1: http://br.de/mediathek
pull/2428/head
David Triendl 2014-02-21 17:28:30 +01:00
parent f7300c5c90
commit 3eb38acb43
2 changed files with 71 additions and 0 deletions

View File

@ -19,6 +19,7 @@ from .bbccouk import BBCCoUkIE
from .blinkx import BlinkxIE
from .bliptv import BlipTVIE, BlipTVUserIE
from .bloomberg import BloombergIE
from .br import BRIE
from .breakcom import BreakIE
from .brightcove import BrightcoveIE
from .c56 import C56IE

View File

@ -0,0 +1,70 @@
# coding: utf-8
from .common import InfoExtractor
class BRIE(InfoExtractor):
IE_DESC = u"Bayerischer Rundfunk Mediathek"
_VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?:[a-z0-9\-]+\.html)$"
_BASE_URL = u"http://www.br.de"
_TESTS = []
def _real_extract(self, url):
page = self._download_webpage(url, None)
xml_url = self._search_regex(r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL")
xml = self._download_xml(self._BASE_URL + xml_url, None)
videos = []
for xml_video in xml.findall("video"):
video = {}
video["id"] = xml_video.get("externalId")
video["title"] = xml_video.find("title").text
video["formats"] = self._extract_formats(xml_video.find("assets"))
video["thumbnails"] = self._extract_thumbnails(xml_video.find("teaserImage/variants"))
video["thumbnail"] = video["thumbnails"][0]["url"]
video["description"] = " ".join(xml_video.find("shareTitle").text.splitlines())
video["uploader"] = xml_video.find("author").text
video["upload_date"] = "".join(reversed(xml_video.find("broadcastDate").text.split(".")))
video["webpage_url"] = xml_video.find("permalink").text
videos.append(video)
if len(videos) > 1:
self._downloader.report_warning(u'found multiple videos; please'
u'report this with the video URL to http://yt-dl.org/bug')
return videos[0]
def _extract_formats(self, assets):
vformats = []
for asset in assets.findall("asset"):
if asset.find("downloadUrl") is None:
continue
vformat = {}
vformat["url"] = asset.find("downloadUrl").text
vformat["ext"] = asset.find("mediaType").text
vformat["format_id"] = asset.get("type")
vformat["width"] = int(asset.find("frameWidth").text)
vformat["height"] = int(asset.find("frameHeight").text)
vformat["resolution"] = "%ix%i" % (vformat["width"], vformat["height"])
vformat["tbr"] = int(asset.find("bitrateVideo").text)
vformat["abr"] = int(asset.find("bitrateAudio").text)
vformat["vcodec"] = asset.find("codecVideo").text
vformat["container"] = vformat["ext"]
vformat["filesize"] = int(asset.find("size").text)
vformat["preference"] = vformat["quality"] = -1
vformat["format"] = "%s container with %i Kbps %s" % (vformat["container"], vformat["tbr"], vformat["vcodec"])
vformats.append(vformat)
self._sort_formats(vformats)
return vformats
def _extract_thumbnails(self, variants):
thumbnails = []
for variant in variants.findall("variant"):
thumbnail = {}
thumbnail["url"] = self._BASE_URL + variant.find("url").text
thumbnail["width"] = int(variant.find("width").text)
thumbnail["height"] = int(variant.find("height").text)
thumbnail["resolution"] = "%ix%i" % (thumbnail["width"], thumbnail["height"])
thumbnails.append(thumbnail)
thumbnails.sort(key = lambda x: x["width"] * x["height"], reverse=True)
return thumbnails