Improve geo bypass mechanism

* Rename options to preffixly match with --geo-verification-proxy
* Introduce _GEO_COUNTRIES for extractors
* Implement faking IP right away for sites with known geo restriction
This commit is contained in:
Sergey M․ 2017-02-19 03:53:23 +07:00 committed by Sergey M
parent 0a840f584c
commit 4248dad92b
13 changed files with 72 additions and 31 deletions

View File

@ -323,10 +323,15 @@ class InfoExtractor(object):
_real_extract() methods and define a _VALID_URL regexp.
Probably, they should also be added to the list of extractors.
_BYPASS_GEO attribute may be set to False in order to disable
_GEO_BYPASS attribute may be set to False in order to disable
geo restriction bypass mechanisms for a particular extractor.
Though it won't disable explicit geo restriction bypass based on
country code provided with geo_bypass_country.
country code provided with geo_bypass_country. (experimental)
_GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
countries for this extractor. One of these countries will be used by
geo restriction bypass mechanism right away in order to bypass
geo restriction, of course, if the mechanism is not disabled. (experimental)
Finally, the _WORKING attribute should be set to False for broken IEs
in order to warn the users and skip the tests.
@ -335,7 +340,8 @@ class InfoExtractor(object):
_ready = False
_downloader = None
_x_forwarded_for_ip = None
_BYPASS_GEO = True
_GEO_BYPASS = True
_GEO_COUNTRIES = None
_WORKING = True
def __init__(self, downloader=None):
@ -370,14 +376,28 @@ def working(cls):
def initialize(self):
"""Initializes an instance (authentication, etc)."""
if not self._x_forwarded_for_ip:
country_code = self._downloader.params.get('geo_bypass_country', None)
if country_code:
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
self.__initialize_geo_bypass()
if not self._ready:
self._real_initialize()
self._ready = True
def __initialize_geo_bypass(self):
if not self._x_forwarded_for_ip:
country_code = self._downloader.params.get('geo_bypass_country', None)
# If there is no explicit country for geo bypass specified and
# the extractor is known to be geo restricted let's fake IP
# as X-Forwarded-For right away.
if (not country_code and
self._GEO_BYPASS and
self._downloader.params.get('geo_bypass', True) and
self._GEO_COUNTRIES):
country_code = random.choice(self._GEO_COUNTRIES)
if country_code:
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
if self._downloader.params.get('verbose', False):
self._downloader.to_stdout(
'[debug] Using fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
def extract(self, url):
"""Extracts URL information and returns it in list of dicts."""
try:
@ -389,16 +409,8 @@ def extract(self, url):
ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
return ie_result
except GeoRestrictedError as e:
if (not self._downloader.params.get('geo_bypass_country', None) and
self._BYPASS_GEO and
self._downloader.params.get('geo_bypass', True) and
not self._x_forwarded_for_ip and
e.countries):
self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries))
if self._x_forwarded_for_ip:
self.report_warning(
'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
continue
if self.__maybe_fake_ip_and_retry(e.countries):
continue
raise
except ExtractorError:
raise
@ -407,6 +419,19 @@ def extract(self, url):
except (KeyError, StopIteration) as e:
raise ExtractorError('An extractor error has occurred.', cause=e)
def __maybe_fake_ip_and_retry(self, countries):
if (not self._downloader.params.get('geo_bypass_country', None) and
self._GEO_BYPASS and
self._downloader.params.get('geo_bypass', True) and
not self._x_forwarded_for_ip and
countries):
self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(countries))
if self._x_forwarded_for_ip:
self.report_warning(
'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
return True
return False
def set_downloader(self, downloader):
"""Sets the downloader for this IE."""
self._downloader = downloader

View File

@ -20,6 +20,7 @@
class DramaFeverBaseIE(AMPIE):
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
_NETRC_MACHINE = 'dramafever'
_GEO_COUNTRIES = ['US', 'CA']
_CONSUMER_SECRET = 'DA59dtVXYLxajktV'
@ -118,7 +119,7 @@ def _real_extract(self, url):
if isinstance(e.cause, compat_HTTPError):
self.raise_geo_restricted(
msg='Currently unavailable in your country',
countries=['US', 'CA'])
countries=self._GEO_COUNTRIES)
raise
series_id, episode_number = video_id.split('.')

View File

@ -37,6 +37,7 @@ class GoIE(AdobePassIE):
}
}
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
_GEO_COUNTRIES = ['US']
_TESTS = [{
'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
'info_dict': {
@ -104,7 +105,7 @@ def _real_extract(self, url):
for error in errors:
if error.get('code') == 1002:
self.raise_geo_restricted(
error['message'], countries=['US'])
error['message'], countries=self._GEO_COUNTRIES)
error_message = ', '.join([error['message'] for error in errors])
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
asset_url += '?' + entitlement['uplynkData']['sessionKey']

View File

@ -24,6 +24,7 @@
class ITVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
_GEO_COUNTRIES = ['GB']
_TEST = {
'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053',
'info_dict': {
@ -101,7 +102,8 @@ def _add_sub_element(element, name):
fault_code = xpath_text(resp_env, './/faultcode')
fault_string = xpath_text(resp_env, './/faultstring')
if fault_code == 'InvalidGeoRegion':
self.raise_geo_restricted(msg=fault_string, countries=['GB'])
self.raise_geo_restricted(
msg=fault_string, countries=self._GEO_COUNTRIES)
raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string))
title = xpath_text(playlist, 'EpisodeTitle', fatal=True)
video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)

View File

@ -14,6 +14,7 @@
class NRKBaseIE(InfoExtractor):
_GEO_COUNTRIES = ['NO']
def _real_extract(self, url):
video_id = self._match_id(url)
@ -93,7 +94,8 @@ def video_id_and_title(idx):
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
if 'IsGeoBlocked' in message_type:
self.raise_geo_restricted(
msg=MESSAGES.get('ProgramIsGeoBlocked'), countries=['NO'])
msg=MESSAGES.get('ProgramIsGeoBlocked'),
countries=self._GEO_COUNTRIES)
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, MESSAGES.get(
message_type, message_type)),

View File

@ -10,6 +10,7 @@
class OnDemandKoreaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
_GEO_COUNTRIES = ['US', 'CA']
_TEST = {
'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html',
'info_dict': {
@ -36,7 +37,7 @@ def _real_extract(self, url):
if 'msg_block_01.png' in webpage:
self.raise_geo_restricted(
msg='This content is not available in your region',
countries=['US', 'CA'])
countries=self._GEO_COUNTRIES)
if 'This video is only available to ODK PLUS members.' in webpage:
raise ExtractorError(

View File

@ -193,6 +193,8 @@ class PBSIE(InfoExtractor):
)
''' % '|'.join(list(zip(*_STATIONS))[0])
_GEO_COUNTRIES = ['US']
_TESTS = [
{
'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
@ -492,7 +494,8 @@ def extract_redirect_urls(info):
message = self._ERRORS.get(
redirect_info['http_code'], redirect_info['message'])
if redirect_info['http_code'] == 403:
self.raise_geo_restricted(msg=message, countries=['US'])
self.raise_geo_restricted(
msg=message, countries=self._GEO_COUNTRIES)
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, message), expected=True)

View File

@ -14,7 +14,8 @@
class SRGSSRIE(InfoExtractor):
_VALID_URL = r'(?:https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|srgssr):(?P<bu>srf|rts|rsi|rtr|swi):(?:[^:]+:)?(?P<type>video|audio):(?P<id>[0-9a-f\-]{36}|\d+)'
_BYPASS_GEO = False
_GEO_BYPASS = False
_GEO_COUNTRIES = ['CH']
_ERRORS = {
'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.',
@ -43,7 +44,8 @@ def get_media_data(self, bu, media_type, media_id):
if media_data.get('block') and media_data['block'] in self._ERRORS:
message = self._ERRORS[media_data['block']]
if media_data['block'] == 'GEOBLOCK':
self.raise_geo_restricted(msg=message, countries=['CH'])
self.raise_geo_restricted(
msg=message, countries=self._GEO_COUNTRIES)
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, message), expected=True)

View File

@ -13,6 +13,7 @@
class SVTBaseIE(InfoExtractor):
_GEO_COUNTRIES = ['SE']
def _extract_video(self, video_info, video_id):
formats = []
for vr in video_info['videoReferences']:
@ -39,7 +40,8 @@ def _extract_video(self, video_info, video_id):
})
if not formats and video_info.get('rights', {}).get('geoBlockedSweden'):
self.raise_geo_restricted(
'This video is only available in Sweden', countries=['SE'])
'This video is only available in Sweden',
countries=self._GEO_COUNTRIES)
self._sort_formats(formats)
subtitles = {}

View File

@ -20,6 +20,7 @@ class Vbox7IE(InfoExtractor):
)
(?P<id>[\da-fA-F]+)
'''
_GEO_COUNTRIES = ['BG']
_TESTS = [{
'url': 'http://vbox7.com/play:0946fff23c',
'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
@ -78,7 +79,7 @@ def _real_extract(self, url):
video_url = video['src']
if '/na.mp4' in video_url:
self.raise_geo_restricted(countries=['BG'])
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
uploader = video.get('uploader')

View File

@ -14,7 +14,7 @@
class VGTVIE(XstreamIE):
IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet'
_BYPASS_GEO = False
_GEO_BYPASS = False
_HOST_TO_APPNAME = {
'vgtv.no': 'vgtv',
@ -218,7 +218,8 @@ def _real_extract(self, url):
properties = try_get(
data, lambda x: x['streamConfiguration']['properties'], list)
if properties and 'geoblocked' in properties:
raise self.raise_geo_restricted(countries=['NO'])
raise self.raise_geo_restricted(
countries=[host.rpartition('.')[-1].partition('/')[0].upper()])
self._sort_formats(info['formats'])

View File

@ -27,7 +27,7 @@ class VikiBaseIE(InfoExtractor):
_APP_VERSION = '2.2.5.1428709186'
_APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)'
_BYPASS_GEO = False
_GEO_BYPASS = False
_NETRC_MACHINE = 'viki'
_token = None

View File

@ -3291,7 +3291,7 @@ def random_ipv4(cls, code):
addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
addr_max = addr_min | (0xffffffff >> int(preflen))
return compat_str(socket.inet_ntoa(
compat_struct_pack('!I', random.randint(addr_min, addr_max))))
compat_struct_pack('!L', random.randint(addr_min, addr_max))))
class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):