[utils] Improve and test js_to_json

This commit is contained in:
Philipp Hagemeister 2014-09-30 11:12:59 +02:00
parent 410f3e73ab
commit e7b6d12254
3 changed files with 38 additions and 25 deletions

View File

@ -332,14 +332,28 @@ def test_escape_url(self):
)
self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
def test_js_to_json(self):
def test_js_to_json_realworld(self):
inp = '''{
'clip':{'provider':'pseudo'}
'clip':{'provider':'pseudo'}
}'''
self.assertEqual(js_to_json(inp), '''{
"clip":{"provider":"pseudo"}
"clip":{"provider":"pseudo"}
}''')
json.loads(js_to_json(inp))
inp = '''{
'playlist':[{'controls':{'all':null}}]
}'''
self.assertEqual(js_to_json(inp), '''{
"playlist":[{"controls":{"all":null}}]
}''')
def test_js_to_json_edgecases(self):
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
on = js_to_json('{"abc": true}')
self.assertEqual(json.loads(on), {'abc': True})
if __name__ == '__main__':
unittest.main()

View File

@ -334,7 +334,11 @@ def _download_json(self, url_or_request, video_id,
try:
return json.loads(json_string)
except ValueError as ve:
raise ExtractorError('Failed to download JSON', cause=ve)
errmsg = '%s: Failed to parse JSON ' % video_id
if fatal:
raise ExtractorError(errmsg, cause=ve)
else:
self.report_warning(errmsg + str(ve))
def report_warning(self, msg, video_id=None):
idstr = '' if video_id is None else '%s: ' % video_id

View File

@ -1580,29 +1580,24 @@ def strip_jsonp(code):
def js_to_json(code):
def fix_kv(m):
key = m.group(2)
if key.startswith("'"):
assert key.endswith("'")
assert '"' not in key
key = '"%s"' % key[1:-1]
elif not key.startswith('"'):
key = '"%s"' % key
value = m.group(4)
if value.startswith("'"):
assert value.endswith("'")
assert '"' not in value
value = '"%s"' % value[1:-1]
return m.group(1) + key + m.group(3) + value
v = m.group(0)
if v in ('true', 'false', 'null'):
return v
if v.startswith('"'):
return v
if v.startswith("'"):
v = v[1:-1]
v = re.sub(r"\\\\|\\'|\"", lambda m: {
'\\\\': '\\\\',
"\\'": "'",
'"': '\\"',
}[m.group(0)], v)
return '"%s"' % v
res = re.sub(r'''(?x)
([{,]\s*)
("[^"]*"|\'[^\']*\'|[a-z0-9A-Z]+)
(:\s*)
([0-9.]+|true|false|"[^"]*"|\'[^\']*\'|
(?=\[|\{)
)
"(?:[^"\\]*(?:\\\\|\\")?)*"|
'(?:[^'\\]*(?:\\\\|\\')?)*'|
[a-zA-Z_][a-zA-Z_0-9]*
''', fix_kv, code)
res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
return res