[generic] Fix rss under Python 2.x and move test to extractor

This commit is contained in:
Philipp Hagemeister 2014-08-25 18:03:01 +02:00
parent 829476b80a
commit 0990305d2a
5 changed files with 31 additions and 12 deletions

View File

@ -102,7 +102,10 @@ def expect_info_dict(self, expected_dict, got_dict):
match_rex = re.compile(match_str)
self.assertTrue(
isinstance(got, compat_str) and match_rex.match(got),
isinstance(got, compat_str),
'Expected a %r object, but got %r' % (compat_str, type(got)))
self.assertTrue(
match_rex.match(got),
u'field %s (value: %r) should match %r' % (info_field, got, match_str))
elif isinstance(expected, type):
got = got_dict.get(info_field)

View File

@ -7,6 +7,7 @@
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import (
assertGreaterEqual,
get_params,
gettestcases,
expect_info_dict,
@ -136,7 +137,8 @@ def try_rm_tcs_files():
self.assertEqual(res_dict['_type'], 'playlist')
expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
if 'playlist_mincount' in test_case:
self.assertGreaterEqual(
assertGreaterEqual(
self,
len(res_dict['entries']),
test_case['playlist_mincount'],
'Expected at least %d in playlist %s, but got only %d' % (

View File

@ -310,15 +310,6 @@ def test_multiple_brightcove_videos(self):
self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
self.assertEqual(len(result['entries']), 3)
def test_generic_rss_feed(self):
dl = FakeYDL()
ie = GenericIE(dl)
result = ie.extract('http://phihag.de/2014/youtube-dl/rss.xml')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'http://phihag.de/2014/youtube-dl/rss.xml')
self.assertEqual(result['title'], 'Zero Punctuation')
self.assertTrue(len(result['entries']) > 10)
def test_ted_playlist(self):
dl = FakeYDL()
ie = TEDIE(dl)

View File

@ -341,6 +341,16 @@ class GenericIE(InfoExtractor):
'uploader': 'www.handjobhub.com',
'title': 'Busty Blonde Siri Tit Fuck While Wank at Handjob Hub',
}
},
# RSS feed
{
'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
'info_dict': {
'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
'title': 'Zero Punctuation',
'description': 're:'
},
'playlist_mincount': 11,
}
]

View File

@ -1458,6 +1458,12 @@ def urlencode_postdata(*args, **kargs):
return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
try:
etree_iter = xml.etree.ElementTree.Element.iter
except AttributeError: # Python <=2.6
etree_iter = lambda n: n.findall('.//*')
def parse_xml(s):
class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
def doctype(self, name, pubid, system):
@ -1465,7 +1471,14 @@ def doctype(self, name, pubid, system):
parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
# Fix up XML parser in Python 2.x
if sys.version_info < (3, 0):
for n in etree_iter(tree):
if n.text is not None:
if not isinstance(n.text, compat_str):
n.text = n.text.decode('utf-8')
return tree
if sys.version_info < (3, 0) and sys.platform == 'win32':