From ccdd0ffb805fde3bbf66d80fe765ff4e75377044 Mon Sep 17 00:00:00 2001 From: Alessandro Ghedini Date: Sat, 1 Nov 2014 12:04:15 +0100 Subject: [PATCH 1/6] [generic] indicate when a direct video has been extracted Fixes #4052. --- youtube_dl/extractor/generic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 3882e859c2..8abc340b43 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -559,6 +559,7 @@ def _real_extract(self, url): return { 'id': video_id, 'title': os.path.splitext(url_basename(url))[0], + 'direct': True, 'formats': [{ 'format_id': m.group('format_id'), 'url': url, From 5d63b0aa9301f4e460dc5f3b08f723cbf2091e5a Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Thu, 6 Nov 2014 01:19:20 +0200 Subject: [PATCH 2/6] [goshgay] Fix title extraction and modernize Also remove width and height as they are not of the actual video. --- youtube_dl/extractor/goshgay.py | 43 +++++++++++---------------------- 1 file changed, 14 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py index 7bca21ad0f..18474cbb72 100644 --- a/youtube_dl/extractor/goshgay.py +++ b/youtube_dl/extractor/goshgay.py @@ -1,15 +1,11 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( compat_urlparse, - str_to_int, ExtractorError, ) -import json class GoshgayIE(InfoExtractor): @@ -27,36 +23,27 @@ class GoshgayIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._search_regex(r'class="video-title">

(.+?)<', webpage, 'title') + title = self._og_search_title(webpage) + thumbnail = self._og_search_thumbnail(webpage) + family_friendly = self._html_search_meta( + 'isFamilyFriendly', webpage, default='false') + config_url = self._search_regex( + r"'config'\s*:\s*'([^']+)'", webpage, 'config URL') - player_config = self._search_regex( - r'(?s)jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings') - player_vars = json.loads(player_config.replace("'", '"')) - width = str_to_int(player_vars.get('width')) - height = str_to_int(player_vars.get('height')) - config_uri = player_vars.get('config') + config = self._download_xml( + config_url, video_id, 'Downloading player config XML') - if config_uri is None: - raise ExtractorError('Missing config URI') - node = self._download_xml(config_uri, video_id, 'Downloading player config XML', - errnote='Unable to download XML') - if node is None: + if config is None: raise ExtractorError('Missing config XML') - if node.tag != 'config': + if config.tag != 'config': raise ExtractorError('Missing config attribute') - fns = node.findall('file') - imgs = node.findall('image') - if len(fns) != 1: + fns = config.findall('file') + if len(fns) < 1: raise ExtractorError('Missing media URI') video_url = fns[0].text - if len(imgs) < 1: - thumbnail = None - else: - thumbnail = imgs[0].text url_comp = compat_urlparse.urlparse(url) ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2]) @@ -65,9 +52,7 @@ def _real_extract(self, url): 'id': video_id, 'url': video_url, 'title': title, - 'width': width, - 'height': height, 'thumbnail': thumbnail, 'http_referer': ref, - 'age_limit': 18, + 'age_limit': 0 if family_friendly == 'true' else 18, } From 1fe8fb8c2022b70e6ed44d9c80570239eec74728 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 6 Nov 2014 21:44:07 +0100 Subject: [PATCH 3/6] [vice] Re-add extractor (fixes #4120) The generic extraction no longer works. --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/vice.py | 38 ++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 youtube_dl/extractor/vice.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3f85c99cdd..3c1807f158 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -421,6 +421,7 @@ from .vevo import VevoIE from .vgtv import VGTVIE from .vh1 import VH1IE +from .vice import ViceIE from .viddler import ViddlerIE from .videobam import VideoBamIE from .videodetective import VideoDetectiveIE diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py new file mode 100644 index 0000000000..f11ca8217f --- /dev/null +++ b/youtube_dl/extractor/vice.py @@ -0,0 +1,38 @@ +from __future__ import unicode_literals +import re + +from .common import InfoExtractor +from .ooyala import OoyalaIE +from ..utils import ExtractorError + + +class ViceIE(InfoExtractor): + _VALID_URL = r'http://www\.vice\.com/.*?/(?P.+)' + + _TEST = { + 'url': 'http://www.vice.com/Fringes/cowboy-capitalists-part-1', + 'info_dict': { + 'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp', + 'ext': 'mp4', + 'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', + }, + 'params': { + # Requires ffmpeg (m3u8 manifest) + 'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + name = mobj.group('name') + webpage = self._download_webpage(url, name) + try: + embed_code = self._search_regex( + r'embedCode=([^&\'"]+)', webpage, + 'ooyala embed code') + ooyala_url = OoyalaIE._url_for_embed_code(embed_code) + print(ooyala_url) + except ExtractorError: + raise ExtractorError('The page doesn\'t contain a video', expected=True) + return self.url_result(ooyala_url, ie='Ooyala') + From 29ed169cd63b3665a8fc4140eb698689bf656361 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Fri, 7 Nov 2014 22:53:54 +0200 Subject: [PATCH 4/6] [wrzuta] Add mp3 as a possible format (Closes #4126) --- youtube_dl/extractor/wrzuta.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/wrzuta.py b/youtube_dl/extractor/wrzuta.py index 34dd6d9528..41756784a3 100644 --- a/youtube_dl/extractor/wrzuta.py +++ b/youtube_dl/extractor/wrzuta.py @@ -49,7 +49,7 @@ def _real_extract(self, url): quality = qualities(['SD', 'MQ', 'HQ', 'HD']) - audio_table = {'flv': 'mp3', 'webm': 'ogg'} + audio_table = {'flv': 'mp3', 'webm': 'ogg', 'mp3': 'mp3'} embedpage = self._download_json('http://www.wrzuta.pl/npp/embed/%s/%s' % (uploader, video_id), video_id) From 2fdbf27ad8f241c8f23b4a37683243a06903546d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 8 Nov 2014 14:53:23 +0100 Subject: [PATCH 5/6] [niconico:playlist] Use the same video url the webpage uses (closes #4133) --- youtube_dl/extractor/niconico.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 45cbd4ee97..3b5784e8f5 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -175,7 +175,8 @@ def _real_extract(self, url): entries = [{ '_type': 'url', 'ie_key': NiconicoIE.ie_key(), - 'url': 'http://www.nicovideo.jp/watch/%s' % entry['item_id'], + 'url': ('http://www.nicovideo.jp/watch/%s' % + entry['item_data']['video_id']), } for entry in entries] return { From c2b61af54827373780415edce92b971b43ceead1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 8 Nov 2014 15:09:04 +0100 Subject: [PATCH 6/6] [options] Document the syntax for merging formats (closes #3940, closes #4132) --- youtube_dl/options.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 997e92ad77..cdcf2f62cd 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -261,7 +261,16 @@ def _hide_login_info(opts): video_format.add_option( '-f', '--format', action='store', dest='format', metavar='FORMAT', default=None, - help='video format code, specify the order of preference using slashes: -f 22/17/18 . -f mp4 , -f m4a and -f flv are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio') + help='video format code, specify the order of preference using' + ' slashes: -f 22/17/18 . -f mp4 , -f m4a and -f flv are also' + ' supported. You can also use the special names "best",' + ' "bestvideo", "bestaudio", "worst", "worstvideo" and' + ' "worstaudio". By default, youtube-dl will pick the best quality.' + ' Use commas to download multiple audio formats, such as' + ' -f 136/137/mp4/bestvideo,140/m4a/bestaudio.' + ' You can merge the video and audio of two formats into a single' + ' file using -f + (requires ffmpeg or' + ' avconv), for example -f bestvideo+bestaudio.') video_format.add_option( '--all-formats', action='store_const', dest='format', const='all',