From ea1f5e5dbd6c58d4f0872a65b97611732f4b29bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 May 2018 07:21:24 +0700 Subject: [PATCH] [itv:btcc] Add extractor (closes #16139) --- youtube_dl/extractor/extractors.py | 5 +++- youtube_dl/extractor/itv.py | 37 ++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9fe3f649d..316c8199d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -477,7 +477,10 @@ from .iprima import IPrimaIE from .iqiyi import IqiyiIE from .ir90tv import Ir90TvIE -from .itv import ITVIE +from .itv import ( + ITVIE, + ITVBTCCIE, +) from .ivi import ( IviIE, IviCompilationIE diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py index 457b424a2..6a4f8a505 100644 --- a/youtube_dl/extractor/itv.py +++ b/youtube_dl/extractor/itv.py @@ -7,6 +7,7 @@ import re from .common import InfoExtractor +from .brightcove import BrightcoveNewIE from ..compat import ( compat_str, compat_etree_register_namespace, @@ -18,6 +19,7 @@ xpath_text, int_or_none, parse_duration, + smuggle_url, ExtractorError, determine_ext, ) @@ -260,3 +262,38 @@ def extract_subtitle(sub_url): 'subtitles': subtitles, }) return info + + +class ITVBTCCIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P[^/?#&]+)' + _TEST = { + 'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch', + 'info_dict': { + 'id': 'btcc-2018-all-the-action-from-brands-hatch', + 'title': 'BTCC 2018: All the action from Brands Hatch', + }, + 'playlist_mincount': 9, + } + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s' + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result( + smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, { + # ITV does not like some GB IP ranges, so here are some + # IP blocks it accepts + 'geo_ip_blocks': [ + '193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21' + ], + 'referrer': url, + }), + ie=BrightcoveNewIE.ie_key(), video_id=video_id) + for video_id in re.findall(r'data-video-id=["\'](\d+)', webpage)] + + title = self._og_search_title(webpage, fatal=False) + + return self.playlist_result(entries, playlist_id, title)