From bcf89ce62cb4f6ab8802ab6aef01c3afaefc0075 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 10 Mar 2014 17:31:32 +0100 Subject: [PATCH] [generic] Suppress warning about doctypes in RSS parser --- youtube_dl/extractor/generic.py | 4 ++-- youtube_dl/utils.py | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 7a2e5dee0..7666cf207 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -4,7 +4,6 @@ import os import re -import xml.etree.ElementTree from .common import InfoExtractor from .youtube import YoutubeIE @@ -17,6 +16,7 @@ ExtractorError, HEADRequest, + parse_xml, smuggle_url, unescapeHTML, unified_strdate, @@ -274,7 +274,7 @@ def _real_extract(self, url): # Is it an RSS feed? try: - doc = xml.etree.ElementTree.fromstring(webpage.encode('utf-8')) + doc = parse_xml(webpage) if doc.tag == 'rss': return self._extract_rss(url, video_id, doc) except compat_xml_parse_error: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d4abd4031..3943cc9c5 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -22,6 +22,7 @@ import subprocess import sys import traceback +import xml.etree.ElementTree import zlib try: @@ -1267,3 +1268,13 @@ def fixup(url): def urlencode_postdata(*args, **kargs): return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii') + + +def parse_xml(s): + class TreeBuilder(xml.etree.ElementTree.TreeBuilder): + def doctype(self, name, pubid, system): + pass # Ignore doctypes + + parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder()) + kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {} + return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)