[theplatform] Use InfoExtractor._parse_smil_formats()

This commit is contained in:
Yen Chi Hsuan 2015-08-19 23:11:25 +08:00
parent a01da8bbf8
commit f877c6ae5a
2 changed files with 20 additions and 60 deletions

View File

@ -1052,7 +1052,7 @@ def _parse_smil_namespace(self, smil):
return self._search_regex( return self._search_regex(
r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None) r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None): def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
base = smil_url base = smil_url
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)): for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
b = meta.get('base') or meta.get('httpBase') b = meta.get('base') or meta.get('httpBase')
@ -1091,6 +1091,12 @@ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_para
'width': width, 'width': width,
'height': height, 'height': height,
}) })
if transform_rtmp_url:
streamer, src = transform_rtmp_url(streamer, src)
formats[-1].update({
'url': streamer,
'play_path': src,
})
continue continue
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src) src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)

View File

@ -9,9 +9,6 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_str,
)
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
@ -20,7 +17,8 @@
int_or_none, int_or_none,
) )
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'}) default_ns = 'http://www.w3.org/2005/SMIL21/Language'
_x = lambda p: xpath_with_ns(p, {'smil': default_ns})
class ThePlatformIE(InfoExtractor): class ThePlatformIE(InfoExtractor):
@ -145,62 +143,18 @@ def _real_extract(self, url):
'url': src, 'url': src,
}] }]
head = meta.find(_x('smil:head')) formats = self._parse_smil_formats(
body = meta.find(_x('smil:body')) meta, smil_url, video_id, namespace=default_ns,
f4m_node = body.find(_x('smil:seq//smil:video'))
if f4m_node is None:
f4m_node = body.find(_x('smil:seq/smil:video'))
if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:
f4m_url = f4m_node.attrib['src']
if 'manifest.f4m?' not in f4m_url:
f4m_url += '?'
# the parameters are from syfy.com, other sites may use others, # the parameters are from syfy.com, other sites may use others,
# they also work for nbc.com # they also work for nbc.com
f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3' f4m_params={'g': 'UXWGVKRWHFSP', 'hdcore': '3.0.3'},
formats = self._extract_f4m_formats(f4m_url, video_id) transform_rtmp_url=lambda streamer, src: (streamer, 'mp4:' + src))
else:
formats = [] for _format in formats:
switch = body.find(_x('smil:switch')) ext = determine_ext(_format['url'])
if switch is None:
switch = body.find(_x('smil:par//smil:switch'))
if switch is None:
switch = body.find(_x('smil:par/smil:switch'))
if switch is None:
switch = body.find(_x('smil:par'))
if switch is not None:
base_url = head.find(_x('smil:meta')).attrib['base']
for f in switch.findall(_x('smil:video')):
attr = f.attrib
width = int_or_none(attr.get('width'))
height = int_or_none(attr.get('height'))
vbr = int_or_none(attr.get('system-bitrate'), 1000)
format_id = '%dx%d_%dk' % (width, height, vbr)
formats.append({
'format_id': format_id,
'url': base_url,
'play_path': 'mp4:' + attr['src'],
'ext': 'flv',
'width': width,
'height': height,
'vbr': vbr,
})
else:
switch = body.find(_x('smil:seq//smil:switch'))
if switch is None:
switch = body.find(_x('smil:seq/smil:switch'))
for f in switch.findall(_x('smil:video')):
attr = f.attrib
vbr = int_or_none(attr.get('system-bitrate'), 1000)
ext = determine_ext(attr['src'])
if ext == 'once': if ext == 'once':
ext = 'mp4' _format['ext'] = 'mp4'
formats.append({
'format_id': compat_str(vbr),
'url': attr['src'],
'vbr': vbr,
'ext': ext,
})
self._sort_formats(formats) self._sort_formats(formats)
return { return {