From 42939b6129833e3fb9f4c22e4e9f4056df193af2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 30 Nov 2014 00:03:59 +0100 Subject: [PATCH] [youtube] Use a cookie for seeting the language This way, we don't have to do an aditional request --- youtube_dl/extractor/common.py | 6 ++++++ youtube_dl/extractor/youtube.py | 23 +++++------------------ 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 3e84d0e62..e80a2dad0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -13,6 +13,7 @@ import xml.etree.ElementTree from ..compat import ( + compat_cookiejar, compat_http_client, compat_urllib_error, compat_urllib_parse_urlparse, @@ -817,6 +818,11 @@ def _float(self, v, name, fatal=False, **kwargs): self._downloader.report_warning(msg) return res + def _set_cookie(self, domain, name, value, expire_time=None): + cookie = compat_cookiejar.Cookie(0, name, value, None, None, domain, None, + None, '/', True, False, expire_time, '', None, None, None) + self._downloader.cookiejar.set_cookie(cookie) + class SearchInfoExtractor(InfoExtractor): """ diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 93e9b7d6d..96b49fed1 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -7,6 +7,7 @@ import json import os.path import re +import time import traceback from .common import InfoExtractor, SearchInfoExtractor @@ -38,16 +39,14 @@ class YoutubeBaseInfoExtractor(InfoExtractor): """Provide base functions for Youtube extractors""" _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' _TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor' - _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _NETRC_MACHINE = 'youtube' # If True it will raise an error if no login info is provided _LOGIN_REQUIRED = False def _set_language(self): - return bool(self._download_webpage( - self._LANG_URL, None, - note='Setting language', errnote='unable to set language', - fatal=False)) + self._set_cookie('.youtube.com', 'PREF', 'f1=50000000&hl=en', + # YouTube sets the expire time to about two months + expire_time=time.time() + 60*24*3600) def _login(self): """ @@ -178,9 +177,7 @@ def _login(self): def _real_initialize(self): if self._downloader is None: return - if self._get_login_info()[0] is not None: - if not self._set_language(): - return + self._set_language() if not self._login(): return @@ -667,16 +664,6 @@ def _real_extract(self, url): # Get video webpage url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id - pref_cookies = [ - c for c in self._downloader.cookiejar - if c.domain == '.youtube.com' and c.name == 'PREF'] - for pc in pref_cookies: - if 'hl=' in pc.value: - pc.value = re.sub(r'hl=[^&]+', 'hl=en', pc.value) - else: - if pc.value: - pc.value += '&' - pc.value += 'hl=en' video_webpage = self._download_webpage(url, video_id) # Attempt to extract SWF player URL