From ffa7b2bfee7b94191ffc20ef00c22f708c97cddf Mon Sep 17 00:00:00 2001 From: gfabiano Date: Mon, 30 Jul 2018 18:15:20 +0200 Subject: [PATCH] [cbnc] Add support for new URL schema (closes #14193) --- youtube_dl/extractor/cnbc.py | 41 +++++++++++++++++++++++++++++- youtube_dl/extractor/extractors.py | 5 +++- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py index d354d9f95..35c0b6124 100644 --- a/youtube_dl/extractor/cnbc.py +++ b/youtube_dl/extractor/cnbc.py @@ -1,8 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals + from .common import InfoExtractor -from ..utils import smuggle_url +from ..utils import ( + js_to_json, + smuggle_url, +) class CNBCIE(InfoExtractor): @@ -34,3 +38,38 @@ def _real_extract(self, url): {'force_smil_url': True}), 'id': video_id, } + + +class CNBCNewIE(InfoExtractor): + IE_NAME = 'CNBC:new' + _VALID_URL = r'https?://(?:www)?\.cnbc\.com/video.*/(?P[^.]+)' + _TEST = { + 'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', + 'info_dict': { + 'id': '7000031301', + 'ext': 'mp4', + 'title': 'Trump: I don\'t necessarily agree with raising rates', + 'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3', + 'timestamp': 1531958400, + 'upload_date': '20180719', + 'uploader': 'NBCU-CNBC', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + CNBC_URL_TEMPLATE = 'http://video.cnbc.com/gallery/?video=%s' + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._parse_json( + self._search_regex( + r'(?s).*]*>.*?({.+?content_id.+?}).*?', + webpage, display_id), + display_id, transform_source=js_to_json + )['content_id'] + + return self.url_result(self.CNBC_URL_TEMPLATE % video_id, 'CNBC') diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f013d13c3..93574907b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -209,7 +209,10 @@ from .clubic import ClubicIE from .clyp import ClypIE from .cmt import CMTIE -from .cnbc import CNBCIE +from .cnbc import ( + CNBCIE, + CNBCNewIE, +) from .cnn import ( CNNIE, CNNBlogsIE,