[wdr:elefant] Add extractor

This commit is contained in:
Sebastian Leske 2017-10-25 14:59:57 +02:00 committed by Sergey M․
parent df16e645f6
commit 2d8bb80c60
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
2 changed files with 52 additions and 3 deletions

View File

@ -1289,6 +1289,7 @@
from .watchindianporn import WatchIndianPornIE from .watchindianporn import WatchIndianPornIE
from .wdr import ( from .wdr import (
WDRIE, WDRIE,
WDRElefantIE,
WDRMobileIE, WDRMobileIE,
) )
from .webcaster import ( from .webcaster import (

View File

@ -16,7 +16,7 @@
class WDRBaseIE(InfoExtractor): class WDRBaseIE(InfoExtractor):
def _extract_wdr_video(self, webpage, display_id): def _extract_jsonp_url(self, webpage, display_id):
# for wdr.de the data-extension is in a tag with the class "mediaLink" # for wdr.de the data-extension is in a tag with the class "mediaLink"
# for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn" # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn"
# for wdrmaus, in a tag with the class "videoButton" (previously a link # for wdrmaus, in a tag with the class "videoButton" (previously a link
@ -35,8 +35,9 @@ def _extract_wdr_video(self, webpage, display_id):
media_link_obj = self._parse_json(json_metadata, display_id, media_link_obj = self._parse_json(json_metadata, display_id,
transform_source=js_to_json) transform_source=js_to_json)
jsonp_url = media_link_obj['mediaObj']['url'] return media_link_obj['mediaObj']['url']
def _extract_wdr_video(self, jsonp_url, display_id):
metadata = self._download_json( metadata = self._download_json(
jsonp_url, display_id, transform_source=strip_jsonp) jsonp_url, display_id, transform_source=strip_jsonp)
@ -206,7 +207,8 @@ def _real_extract(self, url):
display_id = mobj.group('display_id') display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
info_dict = self._extract_wdr_video(webpage, display_id) jsonp_url = self._extract_jsonp_url(webpage, display_id)
info_dict = self._extract_wdr_video(jsonp_url, display_id)
if not info_dict: if not info_dict:
entries = [ entries = [
@ -239,6 +241,52 @@ def _real_extract(self, url):
return info_dict return info_dict
class WDRElefantIE(WDRBaseIE):
_VALID_URL = r'https?://(?:www\.)wdrmaus.de/elefantenseite/#(?P<display_id>.+)'
IE_NAME = 'wdr:elefant'
_TESTS = [
{
'url': 'http://www.wdrmaus.de/elefantenseite/#folge_ostern_2015',
'info_dict': {
'title': 'Folge Oster-Spezial 2015',
'id': 'mdb-1088195',
'ext': 'mp4',
'age_limit': None,
'upload_date': '20150406'
},
'params': {
'skip_download' : True,
},
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('display_id')
# Table of Contents seems to always be at this address, so fetch it directly.
# The website fetches configurationJS.php5, which links to tableOfContentsJS.php5.
table_of_contents = self._download_json(
'https://www.wdrmaus.de/elefantenseite/data/tableOfContentsJS.php5', display_id)
if display_id not in table_of_contents:
raise ExtractorError(
'No entry in site\'s table of contents for this URL. '
'Is the fragment part of the URL (after the #) correct?',
expected=True)
xml_metadata_path = table_of_contents[display_id]['xmlPath']
xml_metadata = self._download_xml(
'https://www.wdrmaus.de/elefantenseite/' + xml_metadata_path, display_id)
zmdb_url_element = xml_metadata.find('./movie/zmdb_url')
if zmdb_url_element is None:
raise ExtractorError(
'The URL looks valid, but no video was found. Note that download only works '
'on pages showing a single video, not on video selection pages.',
expected=True)
info_dict = self._extract_wdr_video(zmdb_url_element.text, display_id)
return info_dict
class WDRMobileIE(InfoExtractor): class WDRMobileIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?://mobile-ondemand\.wdr\.de/ https?://mobile-ondemand\.wdr\.de/