From a4f3d779db13ec3c8bda67e897de8cd849a7f811 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Feb 2015 17:42:12 +0600 Subject: [PATCH] [nbcnews] Simplify --- youtube_dl/extractor/nbc.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 3e3de9e2d..52e8595ea 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -52,9 +52,9 @@ def _real_extract(self, url): class NBCNewsIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://www\.nbcnews\.com/ - ((video/.+?/(?P\d+))| - ((?Pfeature|nightly-news)/[^/]+/(?P.+))) + _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/ + (?:video/.+?/(?P<id>\d+)| + (?:feature|nightly-news)/[^/]+/(?P<title>.+)) ''' _TESTS = [ @@ -120,17 +120,10 @@ def _real_extract(self, url): # "feature" and "nightly-news" pages use theplatform.com title = mobj.group('title') webpage = self._download_webpage(url, title) - program = mobj.group('program') - if program == 'feature': - bootstrap_json = self._search_regex( - r'var bootstrapJson = ({.+})\s*$', webpage, 'bootstrap json', - flags=re.MULTILINE) - else: - # nightly-news - bootstrap_json = self._search_regex( - r'var playlistData = ({.+});\s*$', webpage, 'playlist data', - flags=re.MULTILINE) - bootstrap = json.loads(bootstrap_json) + bootstrap_json = self._search_regex( + r'var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$', + webpage, 'bootstrap json', flags=re.MULTILINE) + bootstrap = self._parse_json(bootstrap_json, video_id) info = bootstrap['results'][0]['video'] mpxid = info['mpxId']