[tv2hu] improve extraction

This commit is contained in:
Remita Amine 2017-04-12 19:31:18 +01:00
parent 3ef1d0c733
commit e4d74e2778
2 changed files with 30 additions and 46 deletions

View File

@ -1031,7 +1031,7 @@
TV2IE, TV2IE,
TV2ArticleIE, TV2ArticleIE,
) )
from .tv2hu import TV2HUIE from .tv2hu import TV2HuIE
from .tv3 import TV3IE from .tv3 import TV3IE
from .tv4 import TV4IE from .tv4 import TV4IE
from .tv5mondeplus import TV5MondePlusIE from .tv5mondeplus import TV5MondePlusIE

View File

@ -1,29 +1,22 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none
class TV2HUIE(InfoExtractor):
class TV2HuIE(InfoExtractor):
IE_NAME = 'tv2.hu' IE_NAME = 'tv2.hu'
_VALID_URL = r'https?://(?:www\.)?tv2\.hu/(?:musoraink/)?(?P<uploader>[^/]+)/(?:teljes_adasok/)?(?P<id>[0-9]+)_(.+?)\.html' _VALID_URL = r'https?://(?:www\.)?tv2\.hu/(?:[^/]+/)+(?P<id>\d+)_[^/?#]+?\.html'
_JSON_URL = r'(?P<json_url>https?://.+?\.tv2\.hu/vod/(?P<upload_date>\d+)/id_(?P<upload_id>\d+).+?&type=json)'
_TESTS = [{ _TESTS = [{
'url': 'http://tv2.hu/ezek_megorultek/217679_ezek-megorultek---1.-adas-1.-resz.html', 'url': 'http://tv2.hu/ezek_megorultek/217679_ezek-megorultek---1.-adas-1.-resz.html',
'md5': '585e58e2e090f34603804bb2c48e98d8',
'info_dict': { 'info_dict': {
'id': '217679', 'id': '217679',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Ezek megőrültek! - 1. adás 1. rész', 'title': 'Ezek megőrültek! - 1. adás 1. rész',
'upload_id': '220289',
'upload_date': '20160826', 'upload_date': '20160826',
'uploader': 'ezek_megorultek',
'thumbnail': 're:^https?://.*\.jpg$' 'thumbnail': 're:^https?://.*\.jpg$'
},
'params': {
# m3u8 download
'skip_download': True,
} }
}, { }, {
'url': 'http://tv2.hu/ezek_megorultek/teljes_adasok/217677_ezek-megorultek---1.-adas-2.-resz.html', 'url': 'http://tv2.hu/ezek_megorultek/teljes_adasok/217677_ezek-megorultek---1.-adas-2.-resz.html',
@ -35,44 +28,35 @@ class TV2HUIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage( webpage = self._download_webpage(url, video_id)
url, video_id, 'Downloading info page') json_url = self._search_regex(
r'jsonUrl\s*=\s*"([^"]+)"', webpage, 'json url')
json_data = self._download_json(json_url, video_id)
json_url = re.search(self._JSON_URL, webpage) formats = []
for b in ('bitrates', 'backupBitrates'):
json_data = self._download_json( bitrates = json_data.get(b, {})
json_url.group('json_url'), video_id, 'Downloading video info') m3u8_url = bitrates.get('hls')
if m3u8_url:
manifest_url = json_data['bitrates']['hls'] formats.extend(self._extract_wowza_formats(
m3u8_url, video_id, skip_protocols=['rtmp', 'rtsp']))
formats = self._extract_m3u8_formats(
manifest_url, video_id, 'mp4', entry_protocol='m3u8_native')
for i in range(len(json_data['bitrates']['mp4'])):
quality = json_data.get('mp4Labels')[i]
if quality.lower() == 'auto':
continue
formats.append({
'protocol': 'http',
'url': json_data['bitrates']['mp4'][i],
'height': int(quality[:-1]),
'width': int(quality[:-1])/9*16,
'ext': 'mp4',
'format_id': quality,
'format_note': 'HTTP',
'preference': int(quality[:-1])
})
for mp4_url in bitrates.get('mp4', []):
height = int_or_none(self._search_regex(
r'\.(\d+)p\.mp4', mp4_url, 'height', default=None))
formats.append({
'format_id': 'http' + ('-%d' % height if height else ''),
'url': mp4_url,
'height': height,
'width': int_or_none(height / 9.0 * 16.0 if height else None),
})
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
'title': self._og_search_title(webpage).strip(), 'title': self._og_search_title(webpage).strip(),
'thumbnail': self._og_search_property('image', webpage), 'thumbnail': self._og_search_thumbnail(webpage),
'uploader': self._search_regex(self._VALID_URL, url, 'uploader'), 'upload_date': self._search_regex(
'upload_id': json_url.group('upload_id'), r'/vod/(\d{8})/', json_url, 'upload_date', default=None),
'upload_date': json_url.group('upload_date'), 'formats': formats,
'formats': formats
} }