[YoutubeDL] Allow filtering by properties (Fixes #4584)

This commit is contained in:
Philipp Hagemeister 2015-01-23 00:04:05 +01:00
parent 50789175ed
commit 083c9df93b
3 changed files with 120 additions and 0 deletions

View File

@ -281,6 +281,61 @@ def test_youtube_format_selection(self):
downloaded = ydl.downloaded_info_dicts[0] downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], f1id) self.assertEqual(downloaded['format_id'], f1id)
def test_format_filtering(self):
formats = [
{'format_id': 'A', 'filesize': 500, 'width': 1000},
{'format_id': 'B', 'filesize': 1000, 'width': 500},
{'format_id': 'C', 'filesize': 1000, 'width': 400},
{'format_id': 'D', 'filesize': 2000, 'width': 600},
{'format_id': 'E', 'filesize': 3000},
{'format_id': 'F'},
{'format_id': 'G', 'filesize': 1000000},
]
for f in formats:
f['url'] = 'http://_/'
f['ext'] = 'unknown'
info_dict = _make_result(formats)
ydl = YDL({'format': 'best[filesize<3000]'})
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'D')
ydl = YDL({'format': 'best[filesize<=3000]'})
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'E')
ydl = YDL({'format': 'best[filesize <= ? 3000]'})
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'F')
ydl = YDL({'format': 'best [filesize = 1000] [width>450]'})
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'B')
ydl = YDL({'format': 'best [filesize = 1000] [width!=450]'})
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'C')
ydl = YDL({'format': '[filesize>?1]'})
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'G')
ydl = YDL({'format': '[filesize<1M]'})
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'E')
ydl = YDL({'format': '[filesize<1MiB]'})
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'G')
def test_add_extra_info(self): def test_add_extra_info(self):
test_dict = { test_dict = {
'extractor': 'Foo', 'extractor': 'Foo',

View File

@ -10,6 +10,7 @@
import itertools import itertools
import json import json
import locale import locale
import operator
import os import os
import platform import platform
import re import re
@ -49,6 +50,7 @@
make_HTTPS_handler, make_HTTPS_handler,
MaxDownloadsReached, MaxDownloadsReached,
PagedList, PagedList,
parse_filesize,
PostProcessingError, PostProcessingError,
platform_name, platform_name,
preferredencoding, preferredencoding,
@ -768,7 +770,59 @@ def _fixup(r):
else: else:
raise Exception('Invalid result type: %s' % result_type) raise Exception('Invalid result type: %s' % result_type)
def _apply_format_filter(self, format_spec, available_formats):
" Returns a tuple of the remaining format_spec and filtered formats "
OPERATORS = {
'<': operator.lt,
'<=': operator.le,
'>': operator.gt,
'>=': operator.ge,
'=': operator.eq,
'!=': operator.ne,
}
operator_rex = re.compile(r'''(?x)\s*\[
(?P<key>width|height|tbr|abr|vbr|filesize)
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
\]$
''' % '|'.join(map(re.escape, OPERATORS.keys())))
m = operator_rex.search(format_spec)
if not m:
raise ValueError('Invalid format specification %r' % format_spec)
try:
comparison_value = int(m.group('value'))
except ValueError:
comparison_value = parse_filesize(m.group('value'))
if comparison_value is None:
comparison_value = parse_filesize(m.group('value') + 'B')
if comparison_value is None:
raise ValueError(
'Invalid value %r in format specification %r' % (
m.group('value'), format_spec))
op = OPERATORS[m.group('op')]
def _filter(f):
actual_value = f.get(m.group('key'))
if actual_value is None:
return m.group('none_inclusive')
return op(actual_value, comparison_value)
new_formats = [f for f in available_formats if _filter(f)]
new_format_spec = format_spec[:-len(m.group(0))]
if not new_format_spec:
new_format_spec = 'best'
return (new_format_spec, new_formats)
def select_format(self, format_spec, available_formats): def select_format(self, format_spec, available_formats):
while format_spec.endswith(']'):
format_spec, available_formats = self._apply_format_filter(
format_spec, available_formats)
if not available_formats:
return None
if format_spec == 'best' or format_spec is None: if format_spec == 'best' or format_spec is None:
return available_formats[-1] return available_formats[-1]
elif format_spec == 'worst': elif format_spec == 'worst':

View File

@ -289,6 +289,17 @@ def _hide_login_info(opts):
'extensions aac, m4a, mp3, mp4, ogg, wav, webm. ' 'extensions aac, m4a, mp3, mp4, ogg, wav, webm. '
'You can also use the special names "best",' 'You can also use the special names "best",'
' "bestvideo", "bestaudio", "worst". ' ' "bestvideo", "bestaudio", "worst". '
' You can filter the video results by putting a condition in'
' brackets, as in -f "best[height=720]"'
' (or -f "[filesize>10M]"). '
' This works for filesize, height, width, tbr, abr, and vbr'
' and the comparisons <, <=, >, >=, =, != .'
' Formats for which the value is not known are excluded unless you'
' put a question mark (?) after the operator.'
' You can combine format filters, so '
'-f "[height <=? 720][tbr>500]" '
'selects up to 720p videos (or videos where the height is not '
'known) with a bitrate of at least 500 KBit/s.'
' By default, youtube-dl will pick the best quality.' ' By default, youtube-dl will pick the best quality.'
' Use commas to download multiple audio formats, such as' ' Use commas to download multiple audio formats, such as'
' -f 136/137/mp4/bestvideo,140/m4a/bestaudio.' ' -f 136/137/mp4/bestvideo,140/m4a/bestaudio.'