[collegehumor] Encode the xml before calling xml.etree.ElementTree.fromstring (fixes #1822)

Uses a new helper method in InfoExtractor: _download_xml
2025-02-18 19:20:43 +00:00 · 2013-11-24 14:59:19 +01:00 · 2013-11-24 14:59:19 +01:00 · 267ed0c5d3
commit 267ed0c5d3
parent f459d17018
2 changed files with 8 additions and 5 deletions
--- a/youtube_dl/extractor/collegehumor.py
+++ b/youtube_dl/extractor/collegehumor.py
@ -1,5 +1,4 @@
 import re
 import xml.etree.ElementTree
 from .common import InfoExtractor
 from ..utils import (
@ -46,11 +45,10 @@ def _real_extract(self, url):
        self.report_extraction(video_id)
        xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
-        metaXml = self._download_webpage(xmlUrl, video_id,
+        mdoc = self._download_xml(xmlUrl, video_id,
                                         u'Downloading info XML',
                                         u'Unable to download video info XML')
        mdoc = xml.etree.ElementTree.fromstring(metaXml)
        try:
            videoNode = mdoc.findall('./video')[0]
            youtubeIdNode = videoNode.find('./youtubeID')
@ -65,11 +63,10 @@ def _real_extract(self, url):
        if next_url.endswith(u'manifest.f4m'):
            manifest_url = next_url + '?hdcore=2.10.3'
-            manifestXml = self._download_webpage(manifest_url, video_id,
+            adoc = self._download_xml(manifest_url, video_id,
                                         u'Downloading XML manifest',
                                         u'Unable to download video info XML')
            adoc = xml.etree.ElementTree.fromstring(manifestXml)
            try:
                video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
            except IndexError:
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -4,6 +4,7 @@
 import socket
 import sys
 import netrc
 import xml.etree.ElementTree
 from ..utils import (
    compat_http_client,
@ -208,6 +209,11 @@ def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
        """ Returns the data of the page as a string """
        return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
    def _download_xml(self, url_or_request, video_id, note=u'Downloading XML', errnote=u'Unable to downloand XML'):
        """Return the xml as an xml.etree.ElementTree.Element"""
        xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
        return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
    def to_screen(self, msg):
        """Print msg to screen, prefixing it with '[ie_name]'"""
        self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))