From b5a6d408181c118bf51382f486a2492643ed74ec Mon Sep 17 00:00:00 2001
From: huohuarong <huohuarong@gmail.com>
Date: Mon, 5 Aug 2013 22:51:54 +0800
Subject: [PATCH] fix parse title bug

---
 youtube_dl/extractor/sohu.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py
index cf0ab5478..cd049b6f0 100644
--- a/youtube_dl/extractor/sohu.py
+++ b/youtube_dl/extractor/sohu.py
@@ -27,10 +27,10 @@ def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
         webpage = self._download_webpage(url, video_id)
-        pattern = r'<h1 id="video-title">\n*?(.+?)\n*?</h1>'
+        pattern = r'<title>(.+?)</title>'
         compiled = re.compile(pattern, re.DOTALL)
-        title = self._search_regex(compiled, webpage, u'video title').strip('\t\n')
-        title = clean_html(title)
+        title = self._search_regex(compiled, webpage, u'video title')
+        title = clean_html(title).split('-')[0].strip()
         pattern = re.compile(r'var vid="(\d+)"')
         result = re.search(pattern, webpage)
         if not result:
@@ -41,7 +41,8 @@ def _real_extract(self, url):
         base_url_1 = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
         url_1 = base_url_1 + vid
         logging.info('json url: %s' % url_1)
-        json_1 = json.loads(urllib2.urlopen(url_1).read())
+        webpage = self._download_webpage(url_1, vid)
+        json_1 = json.loads(webpage)
         # get the highest definition video vid and json infomation.
         vids = []
         qualities = ('oriVid', 'superVid', 'highVid', 'norVid')