[viewlift] replace SnagFilms extractors

- add support for other sites that use the same logic
- improve format extraction and sorting
This commit is contained in:
remitamine 2016-04-29 11:14:42 +01:00
parent 14638e2915
commit 67167920db
3 changed files with 34 additions and 19 deletions

View File

@ -673,10 +673,6 @@
SmotriUserIE, SmotriUserIE,
SmotriBroadcastIE, SmotriBroadcastIE,
) )
from .snagfilms import (
SnagFilmsIE,
SnagFilmsEmbedIE,
)
from .snotr import SnotrIE from .snotr import SnotrIE
from .sohu import SohuIE from .sohu import SohuIE
from .soundcloud import ( from .soundcloud import (
@ -879,6 +875,10 @@
) )
from .vidzi import VidziIE from .vidzi import VidziIE
from .vier import VierIE, VierVideosIE from .vier import VierIE, VierVideosIE
from .viewlift import (
ViewLiftIE,
ViewLiftEmbedIE,
)
from .viewster import ViewsterIE from .viewster import ViewsterIE
from .viidea import ViideaIE from .viidea import ViideaIE
from .vimeo import ( from .vimeo import (

View File

@ -51,7 +51,7 @@
from .vimeo import VimeoIE from .vimeo import VimeoIE
from .dailymotion import DailymotionCloudIE from .dailymotion import DailymotionCloudIE
from .onionstudios import OnionStudiosIE from .onionstudios import OnionStudiosIE
from .snagfilms import SnagFilmsEmbedIE from .viewlift import ViewLiftEmbedIE
from .screenwavemedia import ScreenwaveMediaIE from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE from .mtv import MTVServicesEmbeddedIE
from .pladform import PladformIE from .pladform import PladformIE
@ -1924,10 +1924,10 @@ def _playlist_from_matches(matches, getter=None, ie=None):
if onionstudios_url: if onionstudios_url:
return self.url_result(onionstudios_url) return self.url_result(onionstudios_url)
# Look for SnagFilms embeds # Look for ViewLift embeds
snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage) viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
if snagfilms_url: if viewlift_url:
return self.url_result(snagfilms_url) return self.url_result(viewlift_url)
# Look for JWPlatform embeds # Look for JWPlatform embeds
jwplatform_url = JWPlatformIE._extract_url(webpage) jwplatform_url = JWPlatformIE._extract_url(webpage)

View File

@ -13,8 +13,12 @@
) )
class SnagFilmsEmbedIE(InfoExtractor): class ViewLiftBaseIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|embed)\.)?snagfilms\.com/embed/player\?.*\bfilmId=(?P<id>[\da-f-]{36})' _DOMAINS_REGEX = '(?:snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|monumentalsportsnetwork|vayafilm)\.com|kesari\.tv'
class ViewLiftEmbedIE(ViewLiftBaseIE):
_VALID_URL = r'https?://(?:(?:www|embed)\.)?(?:%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f-]{36})' % ViewLiftBaseIE._DOMAINS_REGEX
_TESTS = [{ _TESTS = [{
'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500', 'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
'md5': '2924e9215c6eff7a55ed35b72276bd93', 'md5': '2924e9215c6eff7a55ed35b72276bd93',
@ -40,7 +44,7 @@ class SnagFilmsEmbedIE(InfoExtractor):
@staticmethod @staticmethod
def _extract_url(webpage): def _extract_url(webpage):
mobj = re.search( mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?snagfilms\.com/embed/player.+?)\1', r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?(?:%s)/embed/player.+?)\1' % ViewLiftBaseIE._DOMAINS_REGEX,
webpage) webpage)
if mobj: if mobj:
return mobj.group('url') return mobj.group('url')
@ -55,6 +59,7 @@ def _real_extract(self, url):
'Film %s is not playable in your area.' % video_id, expected=True) 'Film %s is not playable in your area.' % video_id, expected=True)
formats = [] formats = []
has_bitrate = False
for source in self._parse_json(js_to_json(self._search_regex( for source in self._parse_json(js_to_json(self._search_regex(
r'(?s)sources:\s*(\[.+?\]),', webpage, 'json')), video_id): r'(?s)sources:\s*(\[.+?\]),', webpage, 'json')), video_id):
file_ = source.get('file') file_ = source.get('file')
@ -63,22 +68,25 @@ def _real_extract(self, url):
type_ = source.get('type') type_ = source.get('type')
ext = determine_ext(file_) ext = determine_ext(file_)
format_id = source.get('label') or ext format_id = source.get('label') or ext
if all(v == 'm3u8' for v in (type_, ext)): if all(v == 'm3u8' or v == 'hls' for v in (type_, ext)):
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
file_, video_id, 'mp4', m3u8_id='hls')) file_, video_id, 'mp4', m3u8_id='hls'))
else: else:
bitrate = int_or_none(self._search_regex( bitrate = int_or_none(self._search_regex(
[r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext], [r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext],
file_, 'bitrate', default=None)) file_, 'bitrate', default=None))
if not has_bitrate and bitrate:
has_bitrate = True
height = int_or_none(self._search_regex( height = int_or_none(self._search_regex(
r'^(\d+)[pP]$', format_id, 'height', default=None)) r'^(\d+)[pP]$', format_id, 'height', default=None))
formats.append({ formats.append({
'url': file_, 'url': file_,
'format_id': format_id, 'format_id': 'http-%s%s' % (format_id, ('-%dk' % bitrate if bitrate else '')),
'tbr': bitrate, 'tbr': bitrate,
'height': height, 'height': height,
}) })
self._sort_formats(formats) field_preference = None if has_bitrate else ('height', 'tbr', 'format_id')
self._sort_formats(formats, field_preference)
title = self._search_regex( title = self._search_regex(
[r"title\s*:\s*'([^']+)'", r'<title>([^<]+)</title>'], [r"title\s*:\s*'([^']+)'", r'<title>([^<]+)</title>'],
@ -91,8 +99,8 @@ def _real_extract(self, url):
} }
class SnagFilmsIE(InfoExtractor): class ViewLiftIE(ViewLiftBaseIE):
_VALID_URL = r'https?://(?:www\.)?snagfilms\.com/(?:films/title|show)/(?P<id>[^?#]+)' _VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)/(?:films/title|show|(?:news/)?videos?)/(?P<id>[^?#]+)' % ViewLiftBaseIE._DOMAINS_REGEX
_TESTS = [{ _TESTS = [{
'url': 'http://www.snagfilms.com/films/title/lost_for_life', 'url': 'http://www.snagfilms.com/films/title/lost_for_life',
'md5': '19844f897b35af219773fd63bdec2942', 'md5': '19844f897b35af219773fd63bdec2942',
@ -127,10 +135,16 @@ class SnagFilmsIE(InfoExtractor):
# Film is not available. # Film is not available.
'url': 'http://www.snagfilms.com/show/augie_alone/flirting', 'url': 'http://www.snagfilms.com/show/augie_alone/flirting',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.winnersview.com/videos/the-good-son',
'only_matching': True,
}, {
'url': 'http://www.kesari.tv/news/video/1461919076414',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) domain, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
@ -170,7 +184,7 @@ def _real_extract(self, url):
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': 'http://embed.snagfilms.com/embed/player?filmId=%s' % film_id, 'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id),
'id': film_id, 'id': film_id,
'display_id': display_id, 'display_id': display_id,
'title': title, 'title': title,
@ -178,4 +192,5 @@ def _real_extract(self, url):
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'duration': duration, 'duration': duration,
'categories': categories, 'categories': categories,
'ie_key': 'ViewLiftEmbed',
} }