From 336c3a69bd198130e2f65f14dfc83383fec7c5e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 6 Dec 2013 13:22:04 +0100 Subject: [PATCH] [youtube] Extract like and dislike count (#1895) --- youtube_dl/extractor/youtube.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 7fff761bd..52c8e7d04 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -388,10 +388,6 @@ def __init__(self, *args, **kwargs): super(YoutubeIE, self).__init__(*args, **kwargs) self._player_cache = {} - def report_video_webpage_download(self, video_id): - """Report attempt to download video webpage.""" - self.to_screen(u'%s: Downloading video webpage' % video_id) - def report_video_info_webpage_download(self, video_id): """Report attempt to download video info webpage.""" self.to_screen(u'%s: Downloading video info webpage' % video_id) @@ -1258,15 +1254,8 @@ def _real_extract(self, url): video_id = self._extract_id(url) # Get video webpage - self.report_video_webpage_download(video_id) url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id - request = compat_urllib_request.Request(url) - try: - video_webpage_bytes = compat_urllib_request.urlopen(request).read() - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err)) - - video_webpage = video_webpage_bytes.decode('utf-8', 'ignore') + video_webpage = self._download_webpage(url, video_id) # Attempt to extract SWF player URL mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage) @@ -1383,6 +1372,14 @@ def _real_extract(self, url): else: video_description = u'' + def _extract_count(klass): + count = self._search_regex(r'class="%s">([\d,]+)' % re.escape(klass), video_webpage, klass, fatal=False) + if count is not None: + return int(count.replace(',', '')) + return None + like_count = _extract_count(u'likes-count') + dislike_count = _extract_count(u'dislikes-count') + # subtitles video_subtitles = self.extract_subtitles(video_id, video_webpage) @@ -1515,6 +1512,8 @@ def _real_extract(self, url): 'annotations': video_annotations, 'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id, 'view_count': view_count, + 'like_count': like_count, + 'dislike_count': dislike_count, }) return results