diff --git a/README.md b/README.md
index 560bcdca18..2776cb3eb0 100644
--- a/README.md
+++ b/README.md
@@ -113,25 +113,28 @@ ## Verbosity / Simulation Options:
## Video Format Options:
-f, --format FORMAT video format code, specifiy the order of
- preference using slashes: "-f 22/17/18"
+ preference using slashes: "-f 22/17/18". "-f mp4"
+ and "-f flv" are also supported
--all-formats download all available video formats
--prefer-free-formats prefer free video formats unless a specific one
is requested
--max-quality FORMAT highest quality format to download
-F, --list-formats list all available formats (currently youtube
only)
+
+## Subtitle Options:
--write-sub write subtitle file (currently youtube only)
--write-auto-sub write automatic subtitle file (currently youtube
only)
--only-sub [deprecated] alias of --skip-download
--all-subs downloads all the available subtitles of the
- video (currently youtube only)
+ video
--list-subs lists all available subtitles for the video
- (currently youtube only)
- --sub-format FORMAT subtitle format [srt/sbv/vtt] (default=srt)
- (currently youtube only)
- --sub-lang LANG language of the subtitles to download (optional)
- use IETF language tags like 'en'
+ --sub-format FORMAT subtitle format (default=srt) ([sbv/vtt] youtube
+ only)
+ --sub-lang LANGS languages of the subtitles to download (optional)
+ separated by commas, use IETF language tags like
+ 'en,pt'
## Authentication Options:
-u, --username USERNAME account username
@@ -153,6 +156,8 @@ ## Post-processing Options:
processing; the video is erased by default
--no-post-overwrites do not overwrite post-processed files; the post-
processed files are overwritten by default
+ --embed-subs embed subtitles in the video (only for mp4
+ videos)
# CONFIGURATION
diff --git a/devscripts/bash-completion.in b/devscripts/bash-completion.in
index 3b99a96145..bd10f63c2f 100644
--- a/devscripts/bash-completion.in
+++ b/devscripts/bash-completion.in
@@ -4,8 +4,12 @@ __youtube-dl()
COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
opts="{{flags}}"
+ keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater"
- if [[ ${cur} == * ]] ; then
+ if [[ ${cur} =~ : ]]; then
+ COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
+ return 0
+ elif [[ ${cur} == * ]] ; then
COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
return 0
fi
diff --git a/devscripts/gh-pages/add-version.py b/devscripts/gh-pages/add-version.py
index 6af8bb9d84..03733c35d2 100755
--- a/devscripts/gh-pages/add-version.py
+++ b/devscripts/gh-pages/add-version.py
@@ -6,28 +6,35 @@
import urllib.request
if len(sys.argv) <= 1:
- print('Specify the version number as parameter')
- sys.exit()
+ print('Specify the version number as parameter')
+ sys.exit()
version = sys.argv[1]
with open('update/LATEST_VERSION', 'w') as f:
- f.write(version)
+ f.write(version)
versions_info = json.load(open('update/versions.json'))
if 'signature' in versions_info:
- del versions_info['signature']
+ del versions_info['signature']
new_version = {}
-filenames = {'bin': 'youtube-dl', 'exe': 'youtube-dl.exe', 'tar': 'youtube-dl-%s.tar.gz' % version}
+filenames = {
+ 'bin': 'youtube-dl',
+ 'exe': 'youtube-dl.exe',
+ 'tar': 'youtube-dl-%s.tar.gz' % version}
+build_dir = os.path.join('..', '..', 'build', version)
for key, filename in filenames.items():
- print('Downloading and checksumming %s...' %filename)
- url = 'http://youtube-dl.org/downloads/%s/%s' % (version, filename)
- data = urllib.request.urlopen(url).read()
- sha256sum = hashlib.sha256(data).hexdigest()
- new_version[key] = (url, sha256sum)
+ fn = os.path.join(build_dir, filename)
+ with open(fn, 'rb') as f:
+ data = f.read()
+ if not data:
+ raise ValueError('File %s is empty!' % fn)
+ sha256sum = hashlib.sha256(data).hexdigest()
+ new_version[key] = (url, sha256sum)
versions_info['versions'][version] = new_version
versions_info['latest'] = version
-json.dump(versions_info, open('update/versions.json', 'w'), indent=4, sort_keys=True)
\ No newline at end of file
+with open('update/versions.json', 'w') as jsonf:
+ json.dump(versions_info, jsonf, indent=4, sort_keys=True)
diff --git a/devscripts/gh-pages/update-feed.py b/devscripts/gh-pages/update-feed.py
index cfff05fc8f..16571a924c 100755
--- a/devscripts/gh-pages/update-feed.py
+++ b/devscripts/gh-pages/update-feed.py
@@ -22,7 +22,7 @@
@@ -54,4 +54,3 @@
with open('update/releases.atom','w',encoding='utf-8') as atom_file:
atom_file.write(atom_template)
-
diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py
new file mode 100755
index 0000000000..fa4bb2beb1
--- /dev/null
+++ b/devscripts/gh-pages/update-sites.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+import textwrap
+
+# We must be able to import youtube_dl
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+import youtube_dl
+
+def main():
+ with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf:
+ template = tmplf.read()
+
+ ie_htmls = []
+ for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME):
+ ie_html = '{} '.format(ie.IE_NAME)
+ try:
+ ie_html += ': {}'.format(ie.IE_DESC)
+ except AttributeError:
+ pass
+ if ie.working() == False:
+ ie_html += ' (Currently broken)'
+ ie_htmls.append('{} '.format(ie_html))
+
+ template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t'))
+
+ with open('supportedsites.html', 'w', encoding='utf-8') as sitesf:
+ sitesf.write(template)
+
+if __name__ == '__main__':
+ main()
diff --git a/devscripts/release.sh b/devscripts/release.sh
index 46c31e4375..62c68a6cf4 100755
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@@ -67,7 +67,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
(cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
git checkout HEAD -- youtube-dl youtube-dl.exe
-/bin/echo -e "\n### Signing and uploading the new binaries to youtube-dl.org..."
+/bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
@@ -85,6 +85,7 @@ ROOT=$(pwd)
"$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem"
"$ROOT/devscripts/gh-pages/generate-download.py"
"$ROOT/devscripts/gh-pages/update-copyright.py"
+ "$ROOT/devscripts/gh-pages/update-sites.py"
git add *.html *.html.in update
git commit -m "release $version"
git show HEAD
diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py
index 663ccc422b..3d1f83a937 100644
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@@ -14,21 +14,21 @@
# 89
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'",
"/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"),
- # 88
+ # 88 - vflapUV9V 2013/08/28
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
- "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
+ "ioplkjhgfdsazxcvbnm12<4567890QWERTYUIOZLKJHGFDSAeXCVBNM!@#$%^&*()_-+={[]}|:;?/>.3"),
# 87
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
"uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
- # 86
+ # 86 - vfluy6kdb 2013/09/06
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
- "yuioplkjhgfdsazecvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"),
+ "yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"),
# 85
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
- # 84
+ # 84 - vflg0g8PQ 2013/08/29 (sporadic)
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
- "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543q1mnbvcxzasdfghjklpoiuew2"),
+ ">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
# 83
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
diff --git a/test/test_all_urls.py b/test/test_all_urls.py
index c54faa380e..5d8d93e0e9 100644
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -11,24 +11,49 @@
from helper import get_testcases
class TestAllURLsMatching(unittest.TestCase):
+ def setUp(self):
+ self.ies = gen_extractors()
+
+ def matching_ies(self, url):
+ return [ie.IE_NAME for ie in self.ies if ie.suitable(url) and ie.IE_NAME != 'generic']
+
+ def assertMatch(self, url, ie_list):
+ self.assertEqual(self.matching_ies(url), ie_list)
+
def test_youtube_playlist_matching(self):
- self.assertTrue(YoutubePlaylistIE.suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
- self.assertTrue(YoutubePlaylistIE.suitable(u'UUBABnxM4Ar9ten8Mdjj1j0Q')) #585
- self.assertTrue(YoutubePlaylistIE.suitable(u'PL63F0C78739B09958'))
- self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q'))
- self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
- self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC'))
- self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
- self.assertFalse(YoutubePlaylistIE.suitable(u'PLtS2H6bU1M'))
+ assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
+ assertPlaylist(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
+ assertPlaylist(u'UUBABnxM4Ar9ten8Mdjj1j0Q') #585
+ assertPlaylist(u'PL63F0C78739B09958')
+ assertPlaylist(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
+ assertPlaylist(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
+ assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
+ assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
+ self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M'))
def test_youtube_matching(self):
self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
+ self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
+ self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
def test_youtube_channel_matching(self):
- self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM'))
- self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec'))
- self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos'))
+ assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
+ assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
+ assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
+ assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
+
+ def test_youtube_user_matching(self):
+ self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
+
+ def test_youtube_feeds(self):
+ self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watch_later'])
+ self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
+ self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
+ self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
+
+ def test_youtube_show_matching(self):
+ self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
def test_justin_tv_channelid_matching(self):
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
@@ -63,15 +88,12 @@ def test_no_duplicates(self):
self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
def test_keywords(self):
- ies = gen_extractors()
- matching_ies = lambda url: [ie.IE_NAME for ie in ies
- if ie.suitable(url) and ie.IE_NAME != 'generic']
- self.assertEqual(matching_ies(':ytsubs'), ['youtube:subscriptions'])
- self.assertEqual(matching_ies(':ytsubscriptions'), ['youtube:subscriptions'])
- self.assertEqual(matching_ies(':thedailyshow'), ['ComedyCentral'])
- self.assertEqual(matching_ies(':tds'), ['ComedyCentral'])
- self.assertEqual(matching_ies(':colbertreport'), ['ComedyCentral'])
- self.assertEqual(matching_ies(':cr'), ['ComedyCentral'])
+ self.assertMatch(':ytsubs', ['youtube:subscriptions'])
+ self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
+ self.assertMatch(':thedailyshow', ['ComedyCentral'])
+ self.assertMatch(':tds', ['ComedyCentral'])
+ self.assertMatch(':colbertreport', ['ComedyCentral'])
+ self.assertMatch(':cr', ['ComedyCentral'])
if __name__ == '__main__':
diff --git a/test/test_download.py b/test/test_download.py
index 21cb2e6941..23a66254d8 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -127,12 +127,11 @@ def _hook(status):
info_dict = json.load(infof)
for (info_field, expected) in tc.get('info_dict', {}).items():
if isinstance(expected, compat_str) and expected.startswith('md5:'):
- self.assertEqual(expected, 'md5:' + md5(info_dict.get(info_field)))
+ got = 'md5:' + md5(info_dict.get(info_field))
else:
got = info_dict.get(info_field)
- self.assertEqual(
- expected, got,
- u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
+ self.assertEqual(expected, got,
+ u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
# If checkable fields are missing from the test case, print the info_dict
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py
index 217c4a52f5..7c5ac4bc2e 100644
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -63,6 +63,17 @@ def format_bytes(bytes):
converted = float(bytes) / float(1024 ** exponent)
return '%.2f%s' % (converted, suffix)
+ @staticmethod
+ def format_seconds(seconds):
+ (mins, secs) = divmod(seconds, 60)
+ (hours, eta_mins) = divmod(mins, 60)
+ if hours > 99:
+ return '--:--:--'
+ if hours == 0:
+ return '%02d:%02d' % (mins, secs)
+ else:
+ return '%02d:%02d:%02d' % (hours, mins, secs)
+
@staticmethod
def calc_percent(byte_counter, data_len):
if data_len is None:
@@ -78,14 +89,7 @@ def calc_eta(start, now, total, current):
return '--:--'
rate = float(current) / dif
eta = int((float(total) - float(current)) / rate)
- (eta_mins, eta_secs) = divmod(eta, 60)
- (eta_hours, eta_mins) = divmod(eta_mins, 60)
- if eta_hours > 99:
- return '--:--:--'
- if eta_hours == 0:
- return '%02d:%02d' % (eta_mins, eta_secs)
- else:
- return '%02d:%02d:%02d' % (eta_hours, eta_mins, eta_secs)
+ return FileDownloader.format_seconds(eta)
@staticmethod
def calc_speed(start, now, bytes):
@@ -234,12 +238,14 @@ def report_unable_to_resume(self):
"""Report it was impossible to resume download."""
self.to_screen(u'[download] Unable to resume')
- def report_finish(self):
+ def report_finish(self, data_len_str, tot_time):
"""Report download finished."""
if self.params.get('noprogress', False):
self.to_screen(u'[download] Download completed')
else:
- self.to_screen(u'')
+ clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
+ self.to_screen(u'\r%s[download] 100%% of %s in %s' %
+ (clear_line, data_len_str, self.format_seconds(tot_time)))
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
self.report_destination(filename)
@@ -542,7 +548,7 @@ def _do_download(self, filename, info_dict):
self.report_error(u'Did not get any data blocks')
return False
stream.close()
- self.report_finish()
+ self.report_finish(data_len_str, (time.time() - start))
if data_len is not None and byte_counter != data_len:
raise ContentTooShortError(byte_counter, int(data_len))
self.try_rename(tmpfilename, filename)
diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py
index 336a425591..ae56d2082d 100644
--- a/youtube_dl/PostProcessor.py
+++ b/youtube_dl/PostProcessor.py
@@ -137,7 +137,7 @@ def run_ffmpeg(self, path, out_path, codec, more_opts):
try:
FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
except FFmpegPostProcessorError as err:
- raise AudioConversionError(err.message)
+ raise AudioConversionError(err.msg)
def run(self, information):
path = information['filepath']
@@ -207,7 +207,7 @@ def run(self, information):
except:
etype,e,tb = sys.exc_info()
if isinstance(e, AudioConversionError):
- msg = u'audio conversion failed: ' + e.message
+ msg = u'audio conversion failed: ' + e.msg
else:
msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
raise PostProcessingError(msg)
@@ -458,6 +458,7 @@ def run(self, information):
opts.extend(['-f', 'mp4'])
temp_filename = filename + u'.temp'
+ self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index e9f29e6808..b289bd9e26 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -76,7 +76,7 @@ class YoutubeDL(object):
allsubtitles: Downloads all the subtitles of the video
listsubtitles: Lists all available subtitles for the video
subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
- subtitleslangs: Language of the subtitles to download
+ subtitleslangs: List of languages of the subtitles to download
keepvideo: Keep the video file after post-processing
daterange: A DateRange object, download only if the upload_date is in the range.
skip_download: Skip the actual download of the video file
@@ -97,6 +97,7 @@ class YoutubeDL(object):
def __init__(self, params):
"""Create a FileDownloader object with the given options."""
self._ies = []
+ self._ies_instances = {}
self._pps = []
self._progress_hooks = []
self._download_retcode = 0
@@ -111,8 +112,21 @@ def __init__(self, params):
def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list."""
self._ies.append(ie)
+ self._ies_instances[ie.ie_key()] = ie
ie.set_downloader(self)
+ def get_info_extractor(self, ie_key):
+ """
+ Get an instance of an IE with name ie_key, it will try to get one from
+ the _ies list, if there's no instance it will create a new one and add
+ it to the extractor list.
+ """
+ ie = self._ies_instances.get(ie_key)
+ if ie is None:
+ ie = get_info_extractor(ie_key)()
+ self.add_info_extractor(ie)
+ return ie
+
def add_default_info_extractors(self):
"""
Add the InfoExtractors returned by gen_extractors to the end of the list
@@ -294,9 +308,7 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={}):
'''
if ie_key:
- ie = get_info_extractor(ie_key)()
- ie.set_downloader(self)
- ies = [ie]
+ ies = [self.get_info_extractor(ie_key)]
else:
ies = self._ies
@@ -448,7 +460,8 @@ def process_info(self, info_dict):
if self.params.get('forceid', False):
compat_print(info_dict['id'])
if self.params.get('forceurl', False):
- compat_print(info_dict['url'])
+ # For RTMP URLs, also include the playpath
+ compat_print(info_dict['url'] + info_dict.get('play_path', u''))
if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
compat_print(info_dict['thumbnail'])
if self.params.get('forcedescription', False) and 'description' in info_dict:
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 2c2fd441cf..0083f2e99c 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -28,6 +28,7 @@
'Axel Noack',
'Albert Kim',
'Pierre Rudloff',
+ 'Huarong Huo',
)
__license__ = 'Public Domain'
@@ -45,6 +46,7 @@
import warnings
import platform
+
from .utils import *
from .update import update_self
from .version import __version__
@@ -99,6 +101,16 @@ def _find_term_columns():
pass
return None
+ def _hide_login_info(opts):
+ opts = list(opts)
+ for private_opt in ['-p', '--password', '-u', '--username']:
+ try:
+ i = opts.index(private_opt)
+ opts[i+1] = ''
+ except ValueError:
+ pass
+ return opts
+
max_width = 80
max_help_position = 80
@@ -181,7 +193,7 @@ def _find_term_columns():
video_format.add_option('-f', '--format',
action='store', dest='format', metavar='FORMAT',
- help='video format code, specifiy the order of preference using slashes: "-f 22/17/18"')
+ help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
video_format.add_option('--all-formats',
action='store_const', dest='format', help='download all available video formats', const='all')
video_format.add_option('--prefer-free-formats',
@@ -354,9 +366,9 @@ def _find_term_columns():
argv = systemConf + userConf + commandLineConf
opts, args = parser.parse_args(argv)
if opts.verbose:
- sys.stderr.write(u'[debug] System config: ' + repr(systemConf) + '\n')
- sys.stderr.write(u'[debug] User config: ' + repr(userConf) + '\n')
- sys.stderr.write(u'[debug] Command-line args: ' + repr(commandLineConf) + '\n')
+ sys.stderr.write(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
+ sys.stderr.write(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
+ sys.stderr.write(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
return parser, opts, args
@@ -427,6 +439,10 @@ def _real_main(argv=None):
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
https_handler = make_HTTPS_handler(opts)
opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
+ # Delete the default user-agent header, which would otherwise apply in
+ # cases where our custom HTTP handler doesn't come into play
+ # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
+ opener.addheaders =[]
compat_urllib_request.install_opener(opener)
socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
@@ -604,7 +620,7 @@ def _real_main(argv=None):
sys.exc_clear()
except:
pass
- sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()) + u'\n')
+ sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
ydl.add_default_info_extractors()
diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py
new file mode 100644
index 0000000000..9a0c93fa6f
--- /dev/null
+++ b/youtube_dl/aes.py
@@ -0,0 +1,202 @@
+__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text']
+
+import base64
+from math import ceil
+
+from .utils import bytes_to_intlist, intlist_to_bytes
+
+BLOCK_SIZE_BYTES = 16
+
+def aes_ctr_decrypt(data, key, counter):
+ """
+ Decrypt with aes in counter mode
+
+ @param {int[]} data cipher
+ @param {int[]} key 16/24/32-Byte cipher key
+ @param {instance} counter Instance whose next_value function (@returns {int[]} 16-Byte block)
+ returns the next counter block
+ @returns {int[]} decrypted data
+ """
+ expanded_key = key_expansion(key)
+ block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+
+ decrypted_data=[]
+ for i in range(block_count):
+ counter_block = counter.next_value()
+ block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]
+ block += [0]*(BLOCK_SIZE_BYTES - len(block))
+
+ cipher_counter_block = aes_encrypt(counter_block, expanded_key)
+ decrypted_data += xor(block, cipher_counter_block)
+ decrypted_data = decrypted_data[:len(data)]
+
+ return decrypted_data
+
+def key_expansion(data):
+ """
+ Generate key schedule
+
+ @param {int[]} data 16/24/32-Byte cipher key
+ @returns {int[]} 176/208/240-Byte expanded key
+ """
+ data = data[:] # copy
+ rcon_iteration = 1
+ key_size_bytes = len(data)
+ expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
+
+ while len(data) < expanded_key_size_bytes:
+ temp = data[-4:]
+ temp = key_schedule_core(temp, rcon_iteration)
+ rcon_iteration += 1
+ data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+
+ for _ in range(3):
+ temp = data[-4:]
+ data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+
+ if key_size_bytes == 32:
+ temp = data[-4:]
+ temp = sub_bytes(temp)
+ data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+
+ for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0):
+ temp = data[-4:]
+ data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+ data = data[:expanded_key_size_bytes]
+
+ return data
+
+def aes_encrypt(data, expanded_key):
+ """
+ Encrypt one block with aes
+
+ @param {int[]} data 16-Byte state
+ @param {int[]} expanded_key 176/208/240-Byte expanded key
+ @returns {int[]} 16-Byte cipher
+ """
+ rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
+
+ data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
+ for i in range(1, rounds+1):
+ data = sub_bytes(data)
+ data = shift_rows(data)
+ if i != rounds:
+ data = mix_columns(data)
+ data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
+
+ return data
+
+def aes_decrypt_text(data, password, key_size_bytes):
+ """
+ Decrypt text
+ - The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter
+ - The cipher key is retrieved by encrypting the first 16 Byte of 'password'
+ with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's)
+ - Mode of operation is 'counter'
+
+ @param {str} data Base64 encoded string
+ @param {str,unicode} password Password (will be encoded with utf-8)
+ @param {int} key_size_bytes Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit
+ @returns {str} Decrypted data
+ """
+ NONCE_LENGTH_BYTES = 8
+
+ data = bytes_to_intlist(base64.b64decode(data))
+ password = bytes_to_intlist(password.encode('utf-8'))
+
+ key = password[:key_size_bytes] + [0]*(key_size_bytes - len(password))
+ key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
+
+ nonce = data[:NONCE_LENGTH_BYTES]
+ cipher = data[NONCE_LENGTH_BYTES:]
+
+ class Counter:
+ __value = nonce + [0]*(BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
+ def next_value(self):
+ temp = self.__value
+ self.__value = inc(self.__value)
+ return temp
+
+ decrypted_data = aes_ctr_decrypt(cipher, key, Counter())
+ plaintext = intlist_to_bytes(decrypted_data)
+
+ return plaintext
+
+RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
+SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
+ 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
+ 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
+ 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
+ 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
+ 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
+ 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
+ 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
+ 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
+ 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
+ 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
+ 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
+ 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
+ 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
+ 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
+ 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16)
+MIX_COLUMN_MATRIX = ((2,3,1,1),
+ (1,2,3,1),
+ (1,1,2,3),
+ (3,1,1,2))
+
+def sub_bytes(data):
+ return [SBOX[x] for x in data]
+
+def rotate(data):
+ return data[1:] + [data[0]]
+
+def key_schedule_core(data, rcon_iteration):
+ data = rotate(data)
+ data = sub_bytes(data)
+ data[0] = data[0] ^ RCON[rcon_iteration]
+
+ return data
+
+def xor(data1, data2):
+ return [x^y for x, y in zip(data1, data2)]
+
+def mix_column(data):
+ data_mixed = []
+ for row in range(4):
+ mixed = 0
+ for column in range(4):
+ addend = data[column]
+ if MIX_COLUMN_MATRIX[row][column] in (2,3):
+ addend <<= 1
+ if addend > 0xff:
+ addend &= 0xff
+ addend ^= 0x1b
+ if MIX_COLUMN_MATRIX[row][column] == 3:
+ addend ^= data[column]
+ mixed ^= addend & 0xff
+ data_mixed.append(mixed)
+ return data_mixed
+
+def mix_columns(data):
+ data_mixed = []
+ for i in range(4):
+ column = data[i*4 : (i+1)*4]
+ data_mixed += mix_column(column)
+ return data_mixed
+
+def shift_rows(data):
+ data_shifted = []
+ for column in range(4):
+ for row in range(4):
+ data_shifted.append( data[((column + row) & 0b11) * 4 + row] )
+ return data_shifted
+
+def inc(data):
+ data = data[:] # copy
+ for i in range(len(data)-1,-1,-1):
+ if data[i] == 255:
+ data[i] = 0
+ else:
+ data[i] = data[i] + 1
+ break
+ return data
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index b4db8f0bf7..fbe0b8cb7a 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -1,3 +1,5 @@
+from .appletrailers import AppleTrailersIE
+from .addanime import AddAnimeIE
from .archiveorg import ArchiveOrgIE
from .ard import ARDIE
from .arte import ArteTvIE
@@ -6,16 +8,21 @@
from .bliptv import BlipTVIE, BlipTVUserIE
from .breakcom import BreakIE
from .brightcove import BrightcoveIE
+from .c56 import C56IE
from .canalplus import CanalplusIE
+from .canalc2 import Canalc2IE
+from .cnn import CNNIE
from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE
from .condenast import CondeNastIE
from .criterion import CriterionIE
from .cspan import CSpanIE
from .dailymotion import DailymotionIE, DailymotionPlaylistIE
+from .daum import DaumIE
from .depositfiles import DepositFilesIE
from .dotsub import DotsubIE
from .dreisat import DreiSatIE
+from .defense import DefenseGouvFrIE
from .ehow import EHowIE
from .eighttracks import EightTracksIE
from .escapist import EscapistIE
@@ -29,6 +36,7 @@
from .generic import GenericIE
from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE
+from .hark import HarkIE
from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE
from .hypem import HypemIE
@@ -44,23 +52,30 @@
from .liveleak import LiveLeakIE
from .livestream import LivestreamIE
from .metacafe import MetacafeIE
+from .metacritic import MetacriticIE
+from .mit import TechTVMITIE, MITIE
from .mixcloud import MixcloudIE
from .mtv import MTVIE
from .muzu import MuzuTVIE
from .myspass import MySpassIE
from .myvideo import MyVideoIE
+from .naver import NaverIE
from .nba import NBAIE
+from .nbc import NBCNewsIE
from .ooyala import OoyalaIE
+from .orf import ORFIE
from .pbs import PBSIE
from .photobucket import PhotobucketIE
from .pornotube import PornotubeIE
from .rbmaradio import RBMARadioIE
from .redtube import RedTubeIE
from .ringtv import RingTVIE
+from .ro220 import Ro220IE
from .roxwel import RoxwelIE
from .rtlnow import RTLnowIE
from .sina import SinaIE
from .slashdot import SlashdotIE
+from .sohu import SohuIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE
from .spiegel import SpiegelIE
from .stanfordoc import StanfordOpenClassroomIE
@@ -71,18 +86,19 @@
from .tf1 import TF1IE
from .thisav import ThisAVIE
from .traileraddict import TrailerAddictIE
+from .trilulilu import TriluliluIE
from .tudou import TudouIE
from .tumblr import TumblrIE
from .tutv import TutvIE
-from .ustream import UstreamIE
from .unistra import UnistraIE
+from .ustream import UstreamIE
from .vbox7 import Vbox7IE
+from .veehd import VeeHDIE
from .veoh import VeohIE
from .vevo import VevoIE
from .videofyme import VideofyMeIE
from .vimeo import VimeoIE, VimeoChannelIE
from .vine import VineIE
-from .c56 import C56IE
from .wat import WatIE
from .weibo import WeiboIE
from .wimp import WimpIE
@@ -116,12 +132,14 @@
]
_ALL_CLASSES.append(GenericIE)
+
def gen_extractors():
""" Return a list of an instance of every supported extractor.
The order does matter; the first extractor matched is the one handling the URL.
"""
return [klass() for klass in _ALL_CLASSES]
+
def get_info_extractor(ie_name):
"""Returns the info extractor class with the given ie_name"""
return globals()[ie_name+'IE']
diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py
new file mode 100644
index 0000000000..82a785a19c
--- /dev/null
+++ b/youtube_dl/extractor/addanime.py
@@ -0,0 +1,75 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_HTTPError,
+ compat_str,
+ compat_urllib_parse,
+ compat_urllib_parse_urlparse,
+
+ ExtractorError,
+)
+
+
+class AddAnimeIE(InfoExtractor):
+
+ _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video.php\?(?:.*?)v=(?P[\w_]+)(?:.*)'
+ IE_NAME = u'AddAnime'
+ _TEST = {
+ u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
+ u'file': u'24MR3YO5SAS9.flv',
+ u'md5': u'1036a0e0cd307b95bd8a8c3a5c8cfaf1',
+ u'info_dict': {
+ u"description": u"One Piece 606",
+ u"title": u"One Piece 606"
+ }
+ }
+
+ def _real_extract(self, url):
+ try:
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('video_id')
+ webpage = self._download_webpage(url, video_id)
+ except ExtractorError as ee:
+ if not isinstance(ee.cause, compat_HTTPError):
+ raise
+
+ redir_webpage = ee.cause.read().decode('utf-8')
+ action = self._search_regex(
+ r'',
+ redir_webpage, u'redirect vc value')
+ av = re.search(
+ r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
+ redir_webpage)
+ if av is None:
+ raise ExtractorError(u'Cannot find redirect math task')
+ av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3))
+
+ parsed_url = compat_urllib_parse_urlparse(url)
+ av_val = av_res + len(parsed_url.netloc)
+ confirm_url = (
+ parsed_url.scheme + u'://' + parsed_url.netloc +
+ action + '?' +
+ compat_urllib_parse.urlencode({
+ 'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
+ self._download_webpage(
+ confirm_url, video_id,
+ note=u'Confirming after redirect')
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._search_regex(r"var normal_video_file = '(.*?)';",
+ webpage, u'video file URL')
+ video_title = self._og_search_title(webpage)
+ video_description = self._og_search_description(webpage)
+
+ return {
+ '_type': 'video',
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'flv',
+ 'title': video_title,
+ 'description': video_description
+ }
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py
new file mode 100644
index 0000000000..8b191c1963
--- /dev/null
+++ b/youtube_dl/extractor/appletrailers.py
@@ -0,0 +1,166 @@
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+)
+
+
+class AppleTrailersIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P[^/]+)/(?P[^/]+)'
+ _TEST = {
+ u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
+ u"playlist": [
+ {
+ u"file": u"manofsteel-trailer4.mov",
+ u"md5": u"11874af099d480cc09e103b189805d5f",
+ u"info_dict": {
+ u"duration": 111,
+ u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg",
+ u"title": u"Trailer 4",
+ u"upload_date": u"20130523",
+ u"uploader_id": u"wb",
+ },
+ },
+ {
+ u"file": u"manofsteel-trailer3.mov",
+ u"md5": u"07a0a262aae5afe68120eed61137ab34",
+ u"info_dict": {
+ u"duration": 182,
+ u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg",
+ u"title": u"Trailer 3",
+ u"upload_date": u"20130417",
+ u"uploader_id": u"wb",
+ },
+ },
+ {
+ u"file": u"manofsteel-trailer.mov",
+ u"md5": u"e401fde0813008e3307e54b6f384cff1",
+ u"info_dict": {
+ u"duration": 148,
+ u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg",
+ u"title": u"Trailer",
+ u"upload_date": u"20121212",
+ u"uploader_id": u"wb",
+ },
+ },
+ {
+ u"file": u"manofsteel-teaser.mov",
+ u"md5": u"76b392f2ae9e7c98b22913c10a639c97",
+ u"info_dict": {
+ u"duration": 93,
+ u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg",
+ u"title": u"Teaser",
+ u"upload_date": u"20120721",
+ u"uploader_id": u"wb",
+ },
+ }
+ ]
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ movie = mobj.group('movie')
+ uploader_id = mobj.group('company')
+
+ playlist_url = url.partition(u'?')[0] + u'/includes/playlists/web.inc'
+ playlist_snippet = self._download_webpage(playlist_url, movie)
+ playlist_cleaned = re.sub(r'(?s)', u'', playlist_snippet)
+ playlist_html = u'' + playlist_cleaned + u''
+
+ size_cache = {}
+
+ doc = xml.etree.ElementTree.fromstring(playlist_html)
+ playlist = []
+ for li in doc.findall('./div/ul/li'):
+ title = li.find('.//h3').text
+ video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
+ thumbnail = li.find('.//img').attrib['src']
+
+ date_el = li.find('.//p')
+ upload_date = None
+ m = re.search(r':\s?(?P[0-9]{2})/(?P[0-9]{2})/(?P[0-9]{2})', date_el.text)
+ if m:
+ upload_date = u'20' + m.group('year') + m.group('month') + m.group('day')
+ runtime_el = date_el.find('./br')
+ m = re.search(r':\s?(?P[0-9]+):(?P[0-9]{1,2})', runtime_el.tail)
+ duration = None
+ if m:
+ duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
+
+ formats = []
+ for formats_el in li.findall('.//a'):
+ if formats_el.attrib['class'] != 'OverlayPanel':
+ continue
+ target = formats_el.attrib['target']
+
+ format_code = formats_el.text
+ if 'Automatic' in format_code:
+ continue
+
+ size_q = formats_el.attrib['href']
+ size_id = size_q.rpartition('#videos-')[2]
+ if size_id not in size_cache:
+ size_url = url + size_q
+ sizepage_html = self._download_webpage(
+ size_url, movie,
+ note=u'Downloading size info %s' % size_id,
+ errnote=u'Error while downloading size info %s' % size_id,
+ )
+ _doc = xml.etree.ElementTree.fromstring(sizepage_html)
+ size_cache[size_id] = _doc
+
+ sizepage_doc = size_cache[size_id]
+ links = sizepage_doc.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a')
+ for vid_a in links:
+ href = vid_a.get('href')
+ if not href.endswith(target):
+ continue
+ detail_q = href.partition('#')[0]
+ detail_url = url + '/' + detail_q
+
+ m = re.match(r'includes/(?P[^/]+)/', detail_q)
+ detail_id = m.group('detail_id')
+
+ detail_html = self._download_webpage(
+ detail_url, movie,
+ note=u'Downloading detail %s %s' % (detail_id, size_id),
+ errnote=u'Error while downloading detail %s %s' % (detail_id, size_id)
+ )
+ detail_doc = xml.etree.ElementTree.fromstring(detail_html)
+ movie_link_el = detail_doc.find('.//{http://www.w3.org/1999/xhtml}a')
+ assert movie_link_el.get('class') == 'movieLink'
+ movie_link = movie_link_el.get('href').partition('?')[0].replace('_', '_h')
+ ext = determine_ext(movie_link)
+ assert ext == 'mov'
+
+ formats.append({
+ 'format': format_code,
+ 'ext': ext,
+ 'url': movie_link,
+ })
+
+ info = {
+ '_type': 'video',
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'title': title,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'uploader_id': uploader_id,
+ 'user_agent': 'QuickTime compatible (youtube-dl)',
+ }
+ # TODO: Remove when #980 has been merged
+ info['url'] = formats[-1]['url']
+ info['ext'] = formats[-1]['ext']
+
+ playlist.append(info)
+
+ return {
+ '_type': 'playlist',
+ 'id': movie,
+ 'entries': playlist,
+ }
diff --git a/youtube_dl/extractor/c56.py b/youtube_dl/extractor/c56.py
index 4c8a8af091..dc3a8d47d1 100644
--- a/youtube_dl/extractor/c56.py
+++ b/youtube_dl/extractor/c56.py
@@ -12,8 +12,8 @@ class C56IE(InfoExtractor):
_TEST ={
u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
- u'file': u'93440716.mp4',
- u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e',
+ u'file': u'93440716.flv',
+ u'md5': u'e59995ac63d0457783ea05f93f12a866',
u'info_dict': {
u'title': u'网事知多少 第32期:车怒',
},
diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py
new file mode 100644
index 0000000000..50832217a8
--- /dev/null
+++ b/youtube_dl/extractor/canalc2.py
@@ -0,0 +1,35 @@
+# coding: utf-8
+import re
+
+from .common import InfoExtractor
+
+
+class Canalc2IE(InfoExtractor):
+ _IE_NAME = 'canalc2.tv'
+ _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui'
+
+ _TEST = {
+ u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
+ u'file': u'12163.mp4',
+ u'md5': u'060158428b650f896c542dfbb3d6487f',
+ u'info_dict': {
+ u'title': u'Terrasses du Numérique'
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = re.match(self._VALID_URL, url).group(1)
+ webpage = self._download_webpage(url, video_id)
+ file_name = self._search_regex(
+ r"so\.addVariable\('file','(.*?)'\);",
+ webpage, 'file name')
+ video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
+
+ title = self._html_search_regex(
+ r'class="evenement8">(.*?)', webpage, u'title')
+
+ return {'id': video_id,
+ 'ext': 'mp4',
+ 'url': video_url,
+ 'title': title,
+ }
diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py
index 3b1c888762..1f02519a01 100644
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@@ -5,7 +5,7 @@
from ..utils import unified_strdate
class CanalplusIE(InfoExtractor):
- _VALID_URL = r'https?://www\.canalplus\.fr/.*?\?vid=(?P\d+)'
+ _VALID_URL = r'https?://(www\.canalplus\.fr/.*?\?vid=|player\.canalplus\.fr/#/)(?P\d+)'
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
IE_NAME = u'canalplus.fr'
diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py
new file mode 100644
index 0000000000..a79f881cd9
--- /dev/null
+++ b/youtube_dl/extractor/cnn.py
@@ -0,0 +1,58 @@
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+
+class CNNIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/
+ (?P.+?/(?P[^/]+?)(?:\.cnn|(?=&)))'''
+
+ _TESTS = [{
+ u'url': u'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
+ u'file': u'sports_2013_06_09_nadal-1-on-1.cnn.mp4',
+ u'md5': u'3e6121ea48df7e2259fe73a0628605c4',
+ u'info_dict': {
+ u'title': u'Nadal wins 8th French Open title',
+ u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
+ },
+ },
+ {
+ u"url": u"http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
+ u"file": u"us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4",
+ u"md5": u"b5cc60c60a3477d185af8f19a2a26f4e",
+ u"info_dict": {
+ u"title": "Student's epic speech stuns new freshmen",
+ u"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\""
+ }
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ path = mobj.group('path')
+ page_title = mobj.group('title')
+ info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
+ info_xml = self._download_webpage(info_url, page_title)
+ info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
+
+ formats = []
+ for f in info.findall('files/file'):
+ mf = re.match(r'(\d+)x(\d+)(?:_(.*)k)?',f.attrib['bitrate'])
+ if mf is not None:
+ formats.append((int(mf.group(1)), int(mf.group(2)), int(mf.group(3) or 0), f.text))
+ formats = sorted(formats)
+ (_,_,_, video_path) = formats[-1]
+ video_url = 'http://ht.cdn.turner.com/cnn/big%s' % video_path
+
+ thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')])
+ thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails]
+
+ return {'id': info.attrib['id'],
+ 'title': info.find('headline').text,
+ 'url': video_url,
+ 'ext': determine_ext(video_url),
+ 'thumbnail': thumbnails[-1][1],
+ 'thumbnails': thumbs_dict,
+ 'description': info.find('description').text,
+ }
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 52c4483c9e..77726ee243 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -114,6 +114,11 @@ def _real_extract(self, url):
"""Real extraction process. Redefine in subclasses."""
pass
+ @classmethod
+ def ie_key(cls):
+ """A string for getting the InfoExtractor with get_info_extractor"""
+ return cls.__name__[:-2]
+
@property
def IE_NAME(self):
return type(self).__name__[:-2]
@@ -129,7 +134,7 @@ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None):
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
if errnote is None:
errnote = u'Unable to download webpage'
- raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
+ raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err)
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
""" Returns a tuple (page content as string, URL handle) """
@@ -140,12 +145,17 @@ def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=
urlh = self._request_webpage(url_or_request, video_id, note, errnote)
content_type = urlh.headers.get('Content-Type', '')
+ webpage_bytes = urlh.read()
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
if m:
encoding = m.group(1)
else:
- encoding = 'utf-8'
- webpage_bytes = urlh.read()
+ m = re.search(br' ]+charset=[\'"]?([^\'")]+)[ /\'">]',
+ webpage_bytes[:1024])
+ if m:
+ encoding = m.group(1).decode('ascii')
+ else:
+ encoding = 'utf-8'
if self._downloader.params.get('dump_intermediate_pages', False):
try:
url = url_or_request.get_full_url()
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index 003b1d8c3e..f7dffd4cce 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -37,14 +37,14 @@ def _get_available_subtitles(self, video_id):
class DailymotionIE(DailyMotionSubtitlesIE, InfoExtractor):
"""Information Extractor for Dailymotion"""
- _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
+ _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
IE_NAME = u'dailymotion'
_TEST = {
u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
u'file': u'x33vw9.mp4',
u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
u'info_dict': {
- u"uploader": u"Alex and Van .",
+ u"uploader": u"Amphora Alex and Van .",
u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
}
}
@@ -56,6 +56,7 @@ def _real_extract(self, url):
video_id = mobj.group(1).split('_')[0].split('?')[0]
video_extension = 'mp4'
+ url = 'http://www.dailymotion.com/video/%s' % video_id
# Retrieve video webpage to extract further information
request = compat_urllib_request.Request(url)
@@ -78,7 +79,8 @@ def _real_extract(self, url):
embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
embed_page = self._download_webpage(embed_url, video_id,
u'Downloading embed page')
- info = self._search_regex(r'var info = ({.*?}),', embed_page, 'video info')
+ info = self._search_regex(r'var info = ({.*?}),$', embed_page,
+ 'video info', flags=re.MULTILINE)
info = json.loads(info)
# TODO: support choosing qualities
diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py
new file mode 100644
index 0000000000..a804e83bdc
--- /dev/null
+++ b/youtube_dl/extractor/daum.py
@@ -0,0 +1,74 @@
+# encoding: utf-8
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_urllib_parse,
+ determine_ext,
+)
+
+
+class DaumIE(InfoExtractor):
+ _VALID_URL = r'https?://tvpot\.daum\.net/.*?clipid=(?P\d+)'
+ IE_NAME = u'daum.net'
+
+ _TEST = {
+ u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
+ u'file': u'52554690.mp4',
+ u'info_dict': {
+ u'title': u'DOTA 2GETHER 시즌2 6회 - 2부',
+ u'description': u'DOTA 2GETHER 시즌2 6회 - 2부',
+ u'upload_date': u'20130831',
+ u'duration': 3868,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group(1)
+ canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
+ webpage = self._download_webpage(canonical_url, video_id)
+ full_id = self._search_regex(r' ', webpage, re.DOTALL)
if m_brightcove is not None:
self.to_screen(u'Brightcove video detected.')
@@ -151,7 +158,7 @@ def _real_extract(self, url):
mobj = re.search(r'.*?.*?.+)'
+ _VALID_URL = r'https?://.+?\.ign\.com/(?Pvideos|show_videos|articles)(/.+)?/(?P.+)'
IE_NAME = u'ign.com'
_CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
@@ -41,7 +41,11 @@ def _find_video_id(self, webpage):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name_or_id = mobj.group('name_or_id')
+ page_type = mobj.group('type')
webpage = self._download_webpage(url, name_or_id)
+ if page_type == 'articles':
+ video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url')
+ return self.url_result(video_url, ie='IGN')
video_id = self._find_video_id(webpage)
result = self._get_video_info(video_id)
description = self._html_search_regex(self._DESCRIPTION_RE,
@@ -68,7 +72,7 @@ def _get_video_info(self, video_id):
class OneUPIE(IGNIE):
"""Extractor for 1up.com, it uses the ign videos system."""
- _VALID_URL = r'https?://gamevideos.1up.com/video/id/(?P.+)'
+ _VALID_URL = r'https?://gamevideos.1up.com/(?Pvideo)/id/(?P.+)'
IE_NAME = '1up.com'
_DESCRIPTION_RE = r'(.+?)
'
diff --git a/youtube_dl/extractor/kankan.py b/youtube_dl/extractor/kankan.py
index 8537ba5843..445d465017 100644
--- a/youtube_dl/extractor/kankan.py
+++ b/youtube_dl/extractor/kankan.py
@@ -21,8 +21,10 @@ def _real_extract(self, url):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
- title = self._search_regex(r'G_TITLE=[\'"](.+?)[\'"]', webpage, u'video title')
- gcid = self._search_regex(r'lurl:[\'"]http://.+?/.+?/(.+?)/', webpage, u'gcid')
+ title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, u'video title')
+ surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0)
+ gcids = re.findall(r"http://.+?/.+?/(.+?)/", surls)
+ gcid = gcids[-1]
video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
video_id, u'Downloading video url info')
diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py
index e38dc98b4c..e537648ffb 100644
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@@ -122,7 +122,7 @@ def _real_extract(self, url):
video_title = self._html_search_regex(r'(?im)(.*) - Video ', webpage, u'title')
description = self._og_search_description(webpage)
video_uploader = self._html_search_regex(
- r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("channel","([^"]+)"\);',
+ r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
webpage, u'uploader nickname', fatal=False)
return {
diff --git a/youtube_dl/extractor/metacritic.py b/youtube_dl/extractor/metacritic.py
new file mode 100644
index 0000000000..449138b569
--- /dev/null
+++ b/youtube_dl/extractor/metacritic.py
@@ -0,0 +1,55 @@
+import re
+import xml.etree.ElementTree
+import operator
+
+from .common import InfoExtractor
+
+
+class MetacriticIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P\d+)'
+
+ _TEST = {
+ u'url': u'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
+ u'file': u'3698222.mp4',
+ u'info_dict': {
+ u'title': u'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
+ u'description': u'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
+ u'duration': 221,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ webpage = self._download_webpage(url, video_id)
+ # The xml is not well formatted, there are raw '&'
+ info_xml = self._download_webpage('http://www.metacritic.com/video_data?video=' + video_id,
+ video_id, u'Downloading info xml').replace('&', '&')
+ info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
+
+ clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
+ formats = []
+ for videoFile in clip.findall('httpURI/videoFile'):
+ rate_str = videoFile.find('rate').text
+ video_url = videoFile.find('filePath').text
+ formats.append({
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'format_id': rate_str,
+ 'rate': int(rate_str),
+ })
+ formats.sort(key=operator.itemgetter('rate'))
+
+ description = self._html_search_regex(r'Description: (.*?)
',
+ webpage, u'description', flags=re.DOTALL)
+
+ info = {
+ 'id': video_id,
+ 'title': clip.find('title').text,
+ 'formats': formats,
+ 'description': description,
+ 'duration': int(clip.find('duration').text),
+ }
+ # TODO: Remove when #980 has been merged
+ info.update(formats[-1])
+ return info
diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py
new file mode 100644
index 0000000000..52be9232fe
--- /dev/null
+++ b/youtube_dl/extractor/mit.py
@@ -0,0 +1,74 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ get_element_by_id,
+)
+
+
+class TechTVMITIE(InfoExtractor):
+ IE_NAME = u'techtv.mit.edu'
+ _VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P\d+)'
+
+ _TEST = {
+ u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
+ u'file': u'25418.mp4',
+ u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f',
+ u'info_dict': {
+ u'title': u'MIT DNA Learning Center Set',
+ u'description': u'md5:82313335e8a8a3f243351ba55bc1b474',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ raw_page = self._download_webpage(
+ 'http://techtv.mit.edu/videos/%s' % video_id, video_id)
+ clean_page = re.compile(u'', re.S).sub(u'', raw_page)
+
+ base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)',
+ raw_page, u'base url')
+ formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page,
+ u'video formats')
+ formats = json.loads(formats_json)
+ formats = sorted(formats, key=lambda f: f['bitrate'])
+
+ title = get_element_by_id('edit-title', clean_page)
+ description = clean_html(get_element_by_id('edit-description', clean_page))
+ thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'',
+ raw_page, u'thumbnail', flags=re.DOTALL)
+
+ return {'id': video_id,
+ 'title': title,
+ 'url': base_url + formats[-1]['url'].replace('mp4:', ''),
+ 'ext': 'mp4',
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
+
+
+class MITIE(TechTVMITIE):
+ IE_NAME = u'video.mit.edu'
+ _VALID_URL = r'https?://video\.mit\.edu/watch/(?P[^/]+)'
+
+ _TEST = {
+ u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
+ u'file': u'21783.mp4',
+ u'md5': u'7db01d5ccc1895fc5010e9c9e13648da',
+ u'info_dict': {
+ u'title': u'The Government is Profiling You',
+ u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ page_title = mobj.group('title')
+ webpage = self._download_webpage(url, page_title)
+ self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME))
+ embed_url = self._search_regex(r'