From 305168ca3ee869aed5e95212ed529bf0e6e2f37c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 21 Feb 2016 13:55:25 +0600 Subject: [PATCH] [arte:+7] Detect more embeds (Closes #8613) --- youtube_dl/extractor/arte.py | 40 ++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index f4d033e70..a4c786d78 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -110,15 +110,25 @@ def find_iframe_url(webpage, default=NO_DEFAULT): # en and es URLs produce react-based pages with different layout (e.g. # http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world) if not iframe_url: - embed_html = self._parse_json( - self._search_regex( - r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n', - webpage, 'program'), - video_id)['embed_html'] - iframe_url = find_iframe_url(embed_html) - json_url = compat_parse_qs( - compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0] - return self._extract_from_json_url(json_url, video_id, lang) + program = self._search_regex( + r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n', + webpage, 'program', default=None) + if program: + embed_html = self._parse_json(program,video_id) + if embed_html: + iframe_url = find_iframe_url(embed_html['embed_html']) + if iframe_url: + json_url = compat_parse_qs( + compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0] + if json_url: + return self._extract_from_json_url(json_url, video_id, lang) + # Differend kind of embed URL (e.g. + # http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium) + embed_url = self._search_regex( + r']+src=(["\'])(?P.+?)\1', + webpage, 'embed url', group='url') + return self.url_result(embed_url) + def _extract_from_json_url(self, json_url, video_id, lang): info = self._download_json(json_url, video_id) @@ -294,6 +304,7 @@ class ArteTVMagazineIE(ArteTVPlus7IE): _VALID_URL = r'https?://(?:www\.)?arte\.tv/magazine/[^/]+/(?Pfr|de|en|es)/(?P[^/?#&]+)' _TESTS = [{ + # Embedded via