[YoutubeDL] format spec: correctly handle dashes and other unused operators

'mp4-baseline-16x9' must be handled as a single string, but the '-' was treated as an operator.
This commit is contained in:
Jaime Marquínez Ferrándiz 2015-08-04 22:29:23 +02:00
parent a346b1ff57
commit 232541df44
2 changed files with 38 additions and 1 deletions

View File

@ -105,6 +105,7 @@ def test_prefer_free_formats(self):
def test_format_selection(self): def test_format_selection(self):
formats = [ formats = [
{'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL}, {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
{'format_id': 'example-with-dashes', 'ext': 'webm', 'preference': 1, 'url': TEST_URL},
{'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL}, {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL},
{'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL}, {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL},
{'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL}, {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL},
@ -136,6 +137,11 @@ def test_format_selection(self):
downloaded = ydl.downloaded_info_dicts[0] downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], '35') self.assertEqual(downloaded['format_id'], '35')
ydl = YDL({'format': 'example-with-dashes'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'example-with-dashes')
def test_format_selection_audio(self): def test_format_selection_audio(self):
formats = [ formats = [
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL}, {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},

View File

@ -933,6 +933,37 @@ def _parse_filter(tokens):
else: else:
filter_parts.append(string) filter_parts.append(string)
def _remove_unused_ops(tokens):
# Remove operators that we don't use and join them with the sourrounding strings
# for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
ALLOWED_OPS = ('/', '+', ',', '(', ')')
last_string, last_start, last_end, last_line = None, None, None, None
for type, string, start, end, line in tokens:
if type == tokenize.OP and string == '[':
if last_string:
yield tokenize.NAME, last_string, last_start, last_end, last_line
last_string = None
yield type, string, start, end, line
# everything inside brackets will be handled by _parse_filter
for type, string, start, end, line in tokens:
yield type, string, start, end, line
if type == tokenize.OP and string == ']':
break
elif type == tokenize.OP and string in ALLOWED_OPS:
if last_string:
yield tokenize.NAME, last_string, last_start, last_end, last_line
last_string = None
yield type, string, start, end, line
elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
if not last_string:
last_string = string
last_start = start
last_end = end
else:
last_string += string
if last_string:
yield tokenize.NAME, last_string, last_start, last_end, last_line
def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False): def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
selectors = [] selectors = []
current_selector = None current_selector = None
@ -1111,7 +1142,7 @@ def final_selector(formats):
stream = io.BytesIO(format_spec.encode('utf-8')) stream = io.BytesIO(format_spec.encode('utf-8'))
try: try:
tokens = list(compat_tokenize_tokenize(stream.readline)) tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
except tokenize.TokenError: except tokenize.TokenError:
raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec))) raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))