[atresplayer] improve extraction

- select hashlib.md5 constructor as digestmod(in python 3.4+ MD5 as
implicit default digest for digestmod is deprecated.)
- extract hls formats
- update tests
- extract errors
This commit is contained in:
remitamine 2015-12-21 16:26:40 +01:00
parent 5c5a3ecf1b
commit 61ebb401b7

View File

@ -2,6 +2,8 @@
import time import time
import hmac import hmac
import hashlib
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
@ -32,6 +34,19 @@ class AtresPlayerIE(InfoExtractor):
'duration': 5527.6, 'duration': 5527.6,
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
}, },
'skip': 'This video is only available for registered users'
},
{
'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html',
'md5': '0d0e918533bbd4b263f2de4d197d4aac',
'info_dict': {
'id': 'capitulo-112-david-bustamante',
'ext': 'flv',
'title': 'David Bustamante',
'description': 'md5:f33f1c0a05be57f6708d4dd83a3b81c6',
'duration': 1439.0,
'thumbnail': 're:^https?://.*\.jpg$',
},
}, },
{ {
'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html', 'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html',
@ -50,6 +65,13 @@ class AtresPlayerIE(InfoExtractor):
_LOGIN_URL = 'https://servicios.atresplayer.com/j_spring_security_check' _LOGIN_URL = 'https://servicios.atresplayer.com/j_spring_security_check'
_ERRORS = {
'UNPUBLISHED': 'We\'re sorry, but this video is not yet available.',
'DELETED': 'This video has expired and is no longer available for online streaming.',
'GEOUNPUBLISHED': 'We\'re sorry, but this video is not available in your region due to right restrictions.',
# 'PREMIUM': 'PREMIUM',
}
def _real_initialize(self): def _real_initialize(self):
self._login() self._login()
@ -83,23 +105,55 @@ def _real_extract(self, url):
episode_id = self._search_regex( episode_id = self._search_regex(
r'episode="([^"]+)"', webpage, 'episode id') r'episode="([^"]+)"', webpage, 'episode id')
request = sanitized_Request(
self._PLAYER_URL_TEMPLATE % episode_id,
headers={'User-Agent': self._USER_AGENT})
player = self._download_json(request, episode_id, 'Downloading player JSON')
episode_type = player.get('typeOfEpisode')
error_message = self._ERRORS.get(episode_type)
if error_message:
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
formats = []
video_url = player.get('urlVideo')
if video_url:
format_info = {
'url': video_url,
'format_id': 'http',
}
mobj = re.search(r'(?P<bitrate>\d+)K_(?P<width>\d+)x(?P<height>\d+)', video_url)
if mobj:
format_info.update({
'width': int_or_none(mobj.group('width')),
'height': int_or_none(mobj.group('height')),
'tbr': int_or_none(mobj.group('bitrate')),
})
formats.append(format_info)
m3u8_url = player.get('urlVideoHls')
if m3u8_url:
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, episode_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
timestamp = int_or_none(self._download_webpage( timestamp = int_or_none(self._download_webpage(
self._TIME_API_URL, self._TIME_API_URL,
video_id, 'Downloading timestamp', fatal=False), 1000, time.time()) video_id, 'Downloading timestamp', fatal=False), 1000, time.time())
timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT) timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT)
token = hmac.new( token = hmac.new(
self._MAGIC.encode('ascii'), self._MAGIC.encode('ascii'),
(episode_id + timestamp_shifted).encode('utf-8') (episode_id + timestamp_shifted).encode('utf-8'), hashlib.md5
).hexdigest() ).hexdigest()
formats = []
for fmt in ['windows', 'android_tablet']:
request = sanitized_Request( request = sanitized_Request(
self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token)) self._URL_VIDEO_TEMPLATE.format('windows', episode_id, timestamp_shifted, token),
request.add_header('User-Agent', self._USER_AGENT) headers={'User-Agent': self._USER_AGENT})
fmt_json = self._download_json( fmt_json = self._download_json(
request, video_id, 'Downloading %s video JSON' % fmt) request, video_id, 'Downloading windows video JSON')
result = fmt_json.get('resultDes') result = fmt_json.get('resultDes')
if result.lower() != 'ok': if result.lower() != 'ok':
@ -109,7 +163,6 @@ def _real_extract(self, url):
for format_id, video_url in fmt_json['resultObject'].items(): for format_id, video_url in fmt_json['resultObject'].items():
if format_id == 'token' or not video_url.startswith('http'): if format_id == 'token' or not video_url.startswith('http'):
continue continue
if video_url.endswith('/Manifest'):
if 'geodeswowsmpra3player' in video_url: if 'geodeswowsmpra3player' in video_url:
f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0] f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path) f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
@ -117,24 +170,16 @@ def _real_extract(self, url):
continue continue
else: else:
f4m_url = video_url[:-9] + '/manifest.f4m' f4m_url = video_url[:-9] + '/manifest.f4m'
formats.extend(self._extract_f4m_formats(f4m_url, video_id)) f4m_formats = self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False)
else: if f4m_formats:
formats.append({ formats.extend(f4m_formats)
'url': video_url,
'format_id': 'android-%s' % format_id,
'preference': 1,
})
self._sort_formats(formats) self._sort_formats(formats)
player = self._download_json(
self._PLAYER_URL_TEMPLATE % episode_id,
episode_id)
path_data = player.get('pathData') path_data = player.get('pathData')
episode = self._download_xml( episode = self._download_xml(
self._EPISODE_URL_TEMPLATE % path_data, self._EPISODE_URL_TEMPLATE % path_data, video_id,
video_id, 'Downloading episode XML') 'Downloading episode XML')
duration = float_or_none(xpath_text( duration = float_or_none(xpath_text(
episode, './media/asset/info/technical/contentDuration', 'duration')) episode, './media/asset/info/technical/contentDuration', 'duration'))