From 9cf98a2bcc9cae6bb308b42c0da3587b7d4115f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 3 Jan 2013 21:05:04 +0100 Subject: [PATCH 1/3] Allow downloading videos with other characters in their titles Especially html entities --- youtube_dl/InfoExtractors.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index d040eec828..f72defdf27 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3694,9 +3694,10 @@ def _real_extract(self, url): videourl = 'http://store.steampowered.com/video/%s/' % gameID webpage = self._download_webpage(videourl, gameID) mweb = re.finditer(urlRE, webpage) - namesRE = r'(?P[\w:/\.\?=\+\s-]+)' + namesRE = r'(?P.+)' titles = list(re.finditer(namesRE, webpage)) videos = [] + unescaper = compat_html_parser.HTMLParser() for vid,vtitle in zip(mweb,titles): video_id = vid.group('videoID') title = vtitle.group('videoName') @@ -3707,7 +3708,7 @@ def _real_extract(self, url): 'id':video_id, 'url':video_url, 'ext': 'flv', - 'title': title + 'title': unescaper.unescape(title) } videos.append(info) return videos From 5e9d042d8f5dd3296c8a4fd7f538a22874c38324 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 3 Jan 2013 23:51:48 +0100 Subject: [PATCH 2/3] steamIE follow @phihag suggestions --- youtube_dl/InfoExtractors.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index f72defdf27..44516fbdf8 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3694,8 +3694,8 @@ def _real_extract(self, url): videourl = 'http://store.steampowered.com/video/%s/' % gameID webpage = self._download_webpage(videourl, gameID) mweb = re.finditer(urlRE, webpage) - namesRE = r'(?P.+)' - titles = list(re.finditer(namesRE, webpage)) + namesRE = r'(?P.+?)' + titles = re.finditer(namesRE, webpage) videos = [] unescaper = compat_html_parser.HTMLParser() for vid,vtitle in zip(mweb,titles): @@ -3708,7 +3708,7 @@ def _real_extract(self, url): 'id':video_id, 'url':video_url, 'ext': 'flv', - 'title': unescaper.unescape(title) + 'title': unescapeHTML(title) } videos.append(info) return videos From 26714799c98f70f46e2f1da427ae8a5008e26521 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 3 Jan 2013 23:56:02 +0100 Subject: [PATCH 3/3] steamIE remove the HTMLparser object --- youtube_dl/InfoExtractors.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 44516fbdf8..e380f62a1f 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3697,7 +3697,6 @@ def _real_extract(self, url): namesRE = r'(?P.+?)' titles = re.finditer(namesRE, webpage) videos = [] - unescaper = compat_html_parser.HTMLParser() for vid,vtitle in zip(mweb,titles): video_id = vid.group('videoID') title = vtitle.group('videoName')