From f9934b96145af8ac5dfdcbf684827aeaea9912a7 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 12 Apr 2022 01:39:26 +0530 Subject: [PATCH] [cleanup] Mark some compat variables for removal (#2173) Authored by fstirlitz, pukkandan --- test/test_compat.py | 36 -------- test/test_youtube_signature.py | 5 +- yt_dlp/YoutubeDL.py | 16 ++-- yt_dlp/compat.py | 97 ++++++++++----------- yt_dlp/downloader/ism.py | 22 ++--- yt_dlp/extractor/abematv.py | 8 +- yt_dlp/extractor/adobepass.py | 3 +- yt_dlp/extractor/afreecatv.py | 5 +- yt_dlp/extractor/bbc.py | 4 +- yt_dlp/extractor/brightcove.py | 4 +- yt_dlp/extractor/common.py | 11 ++- yt_dlp/extractor/crunchyroll.py | 10 +-- yt_dlp/extractor/generic.py | 6 +- yt_dlp/extractor/microsoftvirtualacademy.py | 9 +- yt_dlp/extractor/mildom.py | 4 +- yt_dlp/extractor/mixcloud.py | 3 +- yt_dlp/extractor/mtv.py | 5 +- yt_dlp/extractor/noz.py | 3 +- yt_dlp/extractor/openload.py | 3 +- yt_dlp/extractor/soundcloud.py | 3 +- yt_dlp/extractor/udemy.py | 3 +- yt_dlp/extractor/vimeo.py | 3 +- yt_dlp/options.py | 21 ++--- yt_dlp/postprocessor/sponskrub.py | 4 +- yt_dlp/utils.py | 44 ++++------ yt_dlp/webvtt.py | 1 - 26 files changed, 134 insertions(+), 199 deletions(-) diff --git a/test/test_compat.py b/test/test_compat.py index c9bc4d7fb..6cbffd6fe 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -13,14 +13,10 @@ from yt_dlp.compat import ( compat_getenv, compat_setenv, - compat_etree_Element, compat_etree_fromstring, compat_expanduser, - compat_shlex_split, compat_str, compat_struct_unpack, - compat_urllib_parse_quote, - compat_urllib_parse_quote_plus, compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, compat_urllib_parse_urlencode, @@ -55,27 +51,6 @@ def test_all_present(self): dir(yt_dlp.compat))) - set(['unicode_literals']) self.assertEqual(all_names, sorted(present_names)) - def test_compat_urllib_parse_quote(self): - self.assertEqual(compat_urllib_parse_quote('abc def'), 'abc%20def') - self.assertEqual(compat_urllib_parse_quote('/user/abc+def'), '/user/abc%2Bdef') - self.assertEqual(compat_urllib_parse_quote('/user/abc+def', safe='+'), '%2Fuser%2Fabc+def') - self.assertEqual(compat_urllib_parse_quote(''), '') - self.assertEqual(compat_urllib_parse_quote('%'), '%25') - self.assertEqual(compat_urllib_parse_quote('%', safe='%'), '%') - self.assertEqual(compat_urllib_parse_quote('津波'), '%E6%B4%A5%E6%B3%A2') - self.assertEqual( - compat_urllib_parse_quote(''' -%%a''', safe='<>=":%/ \r\n'), - ''' -%%a''') - self.assertEqual( - compat_urllib_parse_quote('''(^◣_◢^)っ︻デ═一 ⇀ ⇀ ⇀ ⇀ ⇀ ↶%I%Break%25Things%''', safe='% '), - '''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%86%B6%I%Break%25Things%''') - - def test_compat_urllib_parse_quote_plus(self): - self.assertEqual(compat_urllib_parse_quote_plus('abc def'), 'abc+def') - self.assertEqual(compat_urllib_parse_quote_plus('/abc def'), '%2Fabc+def') - def test_compat_urllib_parse_unquote(self): self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def') self.assertEqual(compat_urllib_parse_unquote('%7e/abc+def'), '~/abc+def') @@ -109,17 +84,6 @@ def test_compat_urllib_parse_urlencode(self): self.assertEqual(compat_urllib_parse_urlencode([(b'abc', 'def')]), 'abc=def') self.assertEqual(compat_urllib_parse_urlencode([(b'abc', b'def')]), 'abc=def') - def test_compat_shlex_split(self): - self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two']) - self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag']) - self.assertEqual(compat_shlex_split('-val 中文'), ['-val', '中文']) - - def test_compat_etree_Element(self): - try: - compat_etree_Element.items - except AttributeError: - self.fail('compat_etree_Element is not a type') - def test_compat_etree_fromstring(self): xml = ''' diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index bbbba073f..6412acce0 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -11,11 +11,12 @@ import io import re import string +import urllib.request from test.helper import FakeYDL, is_download_test from yt_dlp.extractor import YoutubeIE from yt_dlp.jsinterp import JSInterpreter -from yt_dlp.compat import compat_str, compat_urlretrieve +from yt_dlp.compat import compat_str _SIG_TESTS = [ ( @@ -147,7 +148,7 @@ def test_func(self): fn = os.path.join(self.TESTDATA_DIR, basename) if not os.path.exists(fn): - compat_urlretrieve(url, fn) + urllib.request.urlretrieve(url, fn) with io.open(fn, encoding='utf-8') as testf: jscode = testf.read() self.assertEqual(sig_func(jscode, sig_input), expected_sig) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index c626ea3fd..4bf5a8942 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -26,24 +26,20 @@ import traceback import random import unicodedata +import urllib.request from enum import Enum from string import ascii_letters from .compat import ( - compat_basestring, compat_brotli, compat_get_terminal_size, - compat_kwargs, - compat_numeric_types, compat_os_name, compat_pycrypto_AES, compat_shlex_quote, compat_str, - compat_tokenize_tokenize, compat_urllib_error, compat_urllib_request, - compat_urllib_request_DataHandler, windows_enable_vt_mode, ) from .cookies import load_cookies @@ -682,7 +678,7 @@ def check_deprecated(param, option, suggestion): pp_def = dict(pp_def_raw) when = pp_def.pop('when', 'post_process') self.add_post_processor( - get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)), + get_postprocessor(pp_def.pop('key'))(self, **pp_def), when=when) self._setup_opener() @@ -2244,7 +2240,7 @@ def final_selector(ctx): stream = io.BytesIO(format_spec.encode('utf-8')) try: - tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline))) + tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline))) except tokenize.TokenError: raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec))) @@ -2406,7 +2402,7 @@ def sanitize_string_field(info, string_field): def sanitize_numeric_fields(info): for numeric_field in self._NUMERIC_FIELDS: field = info.get(numeric_field) - if field is None or isinstance(field, compat_numeric_types): + if field is None or isinstance(field, (int, float)): continue report_force_conversion(numeric_field, 'numeric', 'int') info[numeric_field] = int_or_none(field) @@ -3589,7 +3585,7 @@ def list_subtitles(self, video_id, subtitles, name='subtitles'): def urlopen(self, req): """ Start an HTTP download """ - if isinstance(req, compat_basestring): + if isinstance(req, str): req = sanitized_Request(req) return self._opener.open(req, timeout=self._socket_timeout) @@ -3739,7 +3735,7 @@ def _setup_opener(self): https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) redirect_handler = YoutubeDLRedirectHandler() - data_handler = compat_urllib_request_DataHandler() + data_handler = urllib.request.DataHandler() # When passing our own FileHandler instance, build_opener won't add the # default FileHandler and allows us to disable the file protocol, which diff --git a/yt_dlp/compat.py b/yt_dlp/compat.py index 6128ff524..5bac87c10 100644 --- a/yt_dlp/compat.py +++ b/yt_dlp/compat.py @@ -81,10 +81,6 @@ def compat_realpath(path): compat_realpath = os.path.realpath -def compat_print(s): - assert isinstance(s, compat_str) - print(s) - try: compat_Pattern = re.Pattern except AttributeError: @@ -173,62 +169,65 @@ def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.pytho # Deprecated -compat_basestring = str +compat_b64decode = base64.b64decode compat_chr = chr +compat_cookiejar = http.cookiejar +compat_cookiejar_Cookie = http.cookiejar.Cookie +compat_cookies_SimpleCookie = http.cookies.SimpleCookie +compat_get_terminal_size = shutil.get_terminal_size +compat_getenv = os.getenv +compat_getpass = getpass.getpass +compat_html_entities = html.entities +compat_html_entities_html5 = html.entities.html5 +compat_HTMLParser = html.parser.HTMLParser +compat_http_client = http.client +compat_http_server = http.server +compat_HTTPError = urllib.error.HTTPError +compat_itertools_count = itertools.count +compat_parse_qs = urllib.parse.parse_qs +compat_str = str +compat_struct_pack = struct.pack +compat_struct_unpack = struct.unpack +compat_tokenize_tokenize = tokenize.tokenize +compat_urllib_error = urllib.error +compat_urllib_parse_unquote = urllib.parse.unquote +compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus +compat_urllib_parse_urlencode = urllib.parse.urlencode +compat_urllib_parse_urlparse = urllib.parse.urlparse +compat_urllib_request = urllib.request +compat_urlparse = compat_urllib_parse = urllib.parse + + +# To be removed + +compat_basestring = str +compat_collections_abc = collections.abc +compat_cookies = http.cookies +compat_etree_Element = etree.Element +compat_etree_register_namespace = etree.register_namespace compat_filter = filter compat_input = input compat_integer_types = (int, ) compat_kwargs = lambda kwargs: kwargs compat_map = map compat_numeric_types = (int, float, complex) -compat_str = str +compat_print = print +compat_shlex_split = shlex.split +compat_socket_create_connection = socket.create_connection +compat_Struct = struct.Struct +compat_subprocess_get_DEVNULL = lambda: DEVNULL +compat_urllib_parse_quote = urllib.parse.quote +compat_urllib_parse_quote_plus = urllib.parse.quote_plus +compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes +compat_urllib_parse_urlunparse = urllib.parse.urlunparse +compat_urllib_request_DataHandler = urllib.request.DataHandler +compat_urllib_response = urllib.response +compat_urlretrieve = urllib.request.urlretrieve +compat_xml_parse_error = etree.ParseError compat_xpath = lambda xpath: xpath compat_zip = zip workaround_optparse_bug9161 = lambda: None -compat_collections_abc = collections.abc -compat_HTMLParser = html.parser.HTMLParser -compat_HTTPError = urllib.error.HTTPError -compat_Struct = struct.Struct -compat_b64decode = base64.b64decode -compat_cookiejar = http.cookiejar -compat_cookiejar_Cookie = compat_cookiejar.Cookie -compat_cookies = http.cookies -compat_cookies_SimpleCookie = compat_cookies.SimpleCookie -compat_etree_Element = etree.Element -compat_etree_register_namespace = etree.register_namespace -compat_get_terminal_size = shutil.get_terminal_size -compat_getenv = os.getenv -compat_getpass = getpass.getpass -compat_html_entities = html.entities -compat_html_entities_html5 = compat_html_entities.html5 -compat_http_client = http.client -compat_http_server = http.server -compat_itertools_count = itertools.count -compat_parse_qs = urllib.parse.parse_qs -compat_shlex_split = shlex.split -compat_socket_create_connection = socket.create_connection -compat_struct_pack = struct.pack -compat_struct_unpack = struct.unpack -compat_subprocess_get_DEVNULL = lambda: DEVNULL -compat_tokenize_tokenize = tokenize.tokenize -compat_urllib_error = urllib.error -compat_urllib_parse = urllib.parse -compat_urllib_parse_quote = urllib.parse.quote -compat_urllib_parse_quote_plus = urllib.parse.quote_plus -compat_urllib_parse_unquote = urllib.parse.unquote -compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus -compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes -compat_urllib_parse_urlencode = urllib.parse.urlencode -compat_urllib_parse_urlparse = urllib.parse.urlparse -compat_urllib_parse_urlunparse = urllib.parse.urlunparse -compat_urllib_request = urllib.request -compat_urllib_request_DataHandler = urllib.request.DataHandler -compat_urllib_response = urllib.response -compat_urlparse = urllib.parse -compat_urlretrieve = urllib.request.urlretrieve -compat_xml_parse_error = etree.ParseError - # Set public objects diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py index 4d5618c83..2ba36085e 100644 --- a/yt_dlp/downloader/ism.py +++ b/yt_dlp/downloader/ism.py @@ -3,25 +3,25 @@ import time import binascii import io +import struct from .fragment import FragmentFD from ..compat import ( - compat_Struct, compat_urllib_error, ) -u8 = compat_Struct('>B') -u88 = compat_Struct('>Bx') -u16 = compat_Struct('>H') -u1616 = compat_Struct('>Hxx') -u32 = compat_Struct('>I') -u64 = compat_Struct('>Q') +u8 = struct.Struct('>B') +u88 = struct.Struct('>Bx') +u16 = struct.Struct('>H') +u1616 = struct.Struct('>Hxx') +u32 = struct.Struct('>I') +u64 = struct.Struct('>Q') -s88 = compat_Struct('>bx') -s16 = compat_Struct('>h') -s1616 = compat_Struct('>hxx') -s32 = compat_Struct('>i') +s88 = struct.Struct('>bx') +s16 = struct.Struct('>h') +s1616 = struct.Struct('>hxx') +s32 = struct.Struct('>i') unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000) diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index a839f0c1f..c7db05475 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -5,13 +5,14 @@ import hmac import re import struct +import urllib.response +import uuid from base64 import urlsafe_b64encode from binascii import unhexlify from .common import InfoExtractor from ..aes import aes_ecb_decrypt from ..compat import ( - compat_urllib_response, compat_urllib_parse_urlparse, compat_urllib_request, ) @@ -19,7 +20,6 @@ ExtractorError, decode_base, int_or_none, - random_uuidv4, request_to_url, time_seconds, update_url_query, @@ -141,7 +141,7 @@ def abematv_license_open(self, url): url = request_to_url(url) ticket = compat_urllib_parse_urlparse(url).netloc response_data = self._get_videokey_from_ticket(ticket) - return compat_urllib_response.addinfourl(io.BytesIO(response_data), headers={ + return urllib.response.addinfourl(io.BytesIO(response_data), headers={ 'Content-Length': len(response_data), }, url=url, code=200) @@ -253,7 +253,7 @@ def _get_device_token(self): if self._USERTOKEN: return self._USERTOKEN - self._DEVICE_ID = random_uuidv4() + self._DEVICE_ID = str(uuid.uuid4()) aks = self._generate_aks(self._DEVICE_ID) user_data = self._download_json( 'https://api.abema.io/v1/users', None, note='Authorizing', diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index 5d98301b8..1292484c6 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -8,7 +8,6 @@ from .common import InfoExtractor from ..compat import ( - compat_kwargs, compat_urlparse, compat_getpass ) @@ -1365,7 +1364,7 @@ def _download_webpage_handle(self, *args, **kwargs): headers.update(kwargs.get('headers', {})) kwargs['headers'] = headers return super(AdobePassIE, self)._download_webpage_handle( - *args, **compat_kwargs(kwargs)) + *args, **kwargs) @staticmethod def _get_mvpd_resource(provider_id, title, guid, rating): diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index 28946e9dd..44bfb8bc2 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -5,7 +5,6 @@ import re from .common import InfoExtractor -from ..compat import compat_xpath from ..utils import ( ExtractorError, OnDemandPagedList, @@ -282,7 +281,7 @@ def _real_extract(self, url): else: raise ExtractorError('Unable to download video info') - video_element = video_xml.findall(compat_xpath('./track/video'))[-1] + video_element = video_xml.findall('./track/video')[-1] if video_element is None or video_element.text is None: raise ExtractorError( 'Video %s does not exist' % video_id, expected=True) @@ -312,7 +311,7 @@ def _real_extract(self, url): if not video_url: entries = [] - file_elements = video_element.findall(compat_xpath('./file')) + file_elements = video_element.findall('./file') one = len(file_elements) == 1 for file_num, file_element in enumerate(file_elements, start=1): file_url = url_or_none(file_element.text) diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index 29ad7ded7..5bc8d3110 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import xml.etree.ElementTree import functools import itertools import json @@ -8,7 +9,6 @@ from .common import InfoExtractor from ..compat import ( - compat_etree_Element, compat_HTTPError, compat_str, compat_urllib_error, @@ -318,7 +318,7 @@ def _get_subtitles(self, media, programme_id): continue captions = self._download_xml( cc_url, programme_id, 'Downloading captions', fatal=False) - if not isinstance(captions, compat_etree_Element): + if not isinstance(captions, xml.etree.ElementTree.Element): continue subtitles['en'] = [ { diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index dcd332b43..60c853898 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -4,6 +4,7 @@ import base64 import re import struct +import xml.etree.ElementTree from .adobepass import AdobePassIE from .common import InfoExtractor @@ -12,7 +13,6 @@ compat_HTTPError, compat_parse_qs, compat_urlparse, - compat_xml_parse_error, ) from ..utils import ( clean_html, @@ -166,7 +166,7 @@ def _build_brightcove_url(cls, object_str): try: object_doc = compat_etree_fromstring(object_str.encode('utf-8')) - except compat_xml_parse_error: + except xml.etree.ElementTree.ParseError: return fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars') diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 48f302f86..8da21a3dc 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3,6 +3,7 @@ import base64 import collections +import xml.etree.ElementTree import hashlib import itertools import json @@ -17,7 +18,6 @@ from ..compat import ( compat_cookiejar_Cookie, compat_cookies_SimpleCookie, - compat_etree_Element, compat_etree_fromstring, compat_expanduser, compat_getpass, @@ -30,7 +30,6 @@ compat_urllib_parse_urlencode, compat_urllib_request, compat_urlparse, - compat_xml_parse_error, ) from ..downloader import FileDownloader from ..downloader.f4m import ( @@ -951,7 +950,7 @@ def _download_xml_handle( fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): """ - Return a tuple (xml as an compat_etree_Element, URL handle). + Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle). See _download_webpage docstring for arguments specification. """ @@ -972,7 +971,7 @@ def _download_xml( transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): """ - Return the xml as an compat_etree_Element. + Return the xml as an xml.etree.ElementTree.Element. See _download_webpage docstring for arguments specification. """ @@ -988,7 +987,7 @@ def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True): xml_string = transform_source(xml_string) try: return compat_etree_fromstring(xml_string.encode('utf-8')) - except compat_xml_parse_error as ve: + except xml.etree.ElementTree.ParseError as ve: errmsg = '%s: Failed to parse XML ' % video_id if fatal: raise ExtractorError(errmsg, cause=ve) @@ -2008,7 +2007,7 @@ def _extract_f4m_formats(self, manifest_url, video_id, preference=None, quality= def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, quality=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), fatal=True, m3u8_id=None): - if not isinstance(manifest, compat_etree_Element) and not fatal: + if not isinstance(manifest, xml.etree.ElementTree.Element) and not fatal: return [] # currently yt-dlp cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py index 7edb645f8..d7696bbd9 100644 --- a/yt_dlp/extractor/crunchyroll.py +++ b/yt_dlp/extractor/crunchyroll.py @@ -6,13 +6,13 @@ import json import zlib +import xml.etree.ElementTree from hashlib import sha1 from math import pow, sqrt, floor from .common import InfoExtractor from .vrv import VRVBaseIE from ..compat import ( compat_b64decode, - compat_etree_Element, compat_etree_fromstring, compat_str, compat_urllib_parse_urlencode, @@ -395,7 +395,7 @@ def _get_subtitles(self, video_id, webpage): 'Downloading subtitles for ' + sub_name, data={ 'subtitle_script_id': sub_id, }) - if not isinstance(sub_doc, compat_etree_Element): + if not isinstance(sub_doc, xml.etree.ElementTree.Element): continue sid = sub_doc.get('id') iv = xpath_text(sub_doc, 'iv', 'subtitle iv') @@ -525,7 +525,7 @@ def _real_extract(self, url): 'video_quality': stream_quality, 'current_page': url, }) - if isinstance(streamdata, compat_etree_Element): + if isinstance(streamdata, xml.etree.ElementTree.Element): stream_info = streamdata.find('./{default}preload/stream_info') if stream_info is not None: stream_infos.append(stream_info) @@ -536,7 +536,7 @@ def _real_extract(self, url): 'video_format': stream_format, 'video_encode_quality': stream_quality, }) - if isinstance(stream_info, compat_etree_Element): + if isinstance(stream_info, xml.etree.ElementTree.Element): stream_infos.append(stream_info) for stream_info in stream_infos: video_encode_id = xpath_text(stream_info, './video_encode_id') @@ -611,7 +611,7 @@ def _real_extract(self, url): season = episode = episode_number = duration = None - if isinstance(metadata, compat_etree_Element): + if isinstance(metadata, xml.etree.ElementTree.Element): season = xpath_text(metadata, 'series_title') episode = xpath_text(metadata, 'episode_title') episode_number = int_or_none(xpath_text(metadata, 'episode_number')) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index f11fc844d..fd620217e 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -4,6 +4,7 @@ import os import re +import xml.etree.ElementTree from .common import InfoExtractor from .youtube import YoutubeIE @@ -12,7 +13,6 @@ compat_str, compat_urllib_parse_unquote, compat_urlparse, - compat_xml_parse_error, ) from ..utils import ( determine_ext, @@ -2827,7 +2827,7 @@ def _real_extract(self, url): try: try: doc = compat_etree_fromstring(webpage) - except compat_xml_parse_error: + except xml.etree.ElementTree.ParseError: doc = compat_etree_fromstring(webpage.encode('utf-8')) if doc.tag == 'rss': self.report_detected('RSS feed') @@ -2862,7 +2862,7 @@ def _real_extract(self, url): self.report_detected('F4M manifest') self._sort_formats(info_dict['formats']) return info_dict - except compat_xml_parse_error: + except xml.etree.ElementTree.ParseError: pass # Is it a Camtasia project? diff --git a/yt_dlp/extractor/microsoftvirtualacademy.py b/yt_dlp/extractor/microsoftvirtualacademy.py index 46abd2a6d..9255a7964 100644 --- a/yt_dlp/extractor/microsoftvirtualacademy.py +++ b/yt_dlp/extractor/microsoftvirtualacademy.py @@ -3,9 +3,6 @@ import re from .common import InfoExtractor -from ..compat import ( - compat_xpath, -) from ..utils import ( int_or_none, parse_duration, @@ -70,9 +67,9 @@ def _real_extract(self, url): formats = [] - for sources in settings.findall(compat_xpath('.//MediaSources')): + for sources in settings.findall('.//MediaSources'): sources_type = sources.get('videoType') - for source in sources.findall(compat_xpath('./MediaSource')): + for source in sources.findall('./MediaSource'): video_url = source.text if not video_url or not video_url.startswith('http'): continue @@ -101,7 +98,7 @@ def _real_extract(self, url): self._sort_formats(formats) subtitles = {} - for source in settings.findall(compat_xpath('.//MarkerResourceSource')): + for source in settings.findall('.//MarkerResourceSource'): subtitle_url = source.text if not subtitle_url: continue diff --git a/yt_dlp/extractor/mildom.py b/yt_dlp/extractor/mildom.py index 5f2df29c6..4de8e9ef4 100644 --- a/yt_dlp/extractor/mildom.py +++ b/yt_dlp/extractor/mildom.py @@ -3,6 +3,7 @@ import functools import json +import uuid from .common import InfoExtractor from ..utils import ( @@ -11,7 +12,6 @@ ExtractorError, float_or_none, OnDemandPagedList, - random_uuidv4, traverse_obj, ) @@ -21,7 +21,7 @@ class MildomBaseIE(InfoExtractor): def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', body=None): if not self._GUEST_ID: - self._GUEST_ID = f'pc-gp-{random_uuidv4()}' + self._GUEST_ID = f'pc-gp-{str(uuid.uuid4())}' content = self._download_json( url, video_id, note=note, data=json.dumps(body).encode() if body else None, diff --git a/yt_dlp/extractor/mixcloud.py b/yt_dlp/extractor/mixcloud.py index c2dd078ac..b19e59b1a 100644 --- a/yt_dlp/extractor/mixcloud.py +++ b/yt_dlp/extractor/mixcloud.py @@ -9,7 +9,6 @@ compat_ord, compat_str, compat_urllib_parse_unquote, - compat_zip ) from ..utils import ( ExtractorError, @@ -76,7 +75,7 @@ def _decrypt_xor_cipher(key, ciphertext): """Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR.""" return ''.join([ compat_chr(compat_ord(ch) ^ compat_ord(k)) - for ch, k in compat_zip(ciphertext, itertools.cycle(key))]) + for ch, k in zip(ciphertext, itertools.cycle(key))]) def _real_extract(self, url): username, slug = self._match_valid_url(url).groups() diff --git a/yt_dlp/extractor/mtv.py b/yt_dlp/extractor/mtv.py index be5de0a70..cff314e27 100644 --- a/yt_dlp/extractor/mtv.py +++ b/yt_dlp/extractor/mtv.py @@ -6,7 +6,6 @@ from .common import InfoExtractor from ..compat import ( compat_str, - compat_xpath, ) from ..utils import ( ExtractorError, @@ -167,9 +166,9 @@ def _get_video_info(self, itemdoc, use_hls=True): itemdoc, './/{http://search.yahoo.com/mrss/}category', 'scheme', 'urn:mtvn:video_title') if title_el is None: - title_el = itemdoc.find(compat_xpath('.//{http://search.yahoo.com/mrss/}title')) + title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title') if title_el is None: - title_el = itemdoc.find(compat_xpath('.//title')) + title_el = itemdoc.find('.//title') if title_el.text is None: title_el = None diff --git a/yt_dlp/extractor/noz.py b/yt_dlp/extractor/noz.py index ccafd7723..bdc2efcd7 100644 --- a/yt_dlp/extractor/noz.py +++ b/yt_dlp/extractor/noz.py @@ -4,7 +4,6 @@ from .common import InfoExtractor from ..compat import ( compat_urllib_parse_unquote, - compat_xpath, ) from ..utils import ( int_or_none, @@ -50,7 +49,7 @@ def _real_extract(self, url): duration = int_or_none(xpath_text( doc, './/article/movie/file/duration')) formats = [] - for qnode in doc.findall(compat_xpath('.//article/movie/file/qualities/qual')): + for qnode in doc.findall('.//article/movie/file/qualities/qual'): http_url_ele = find_xpath_attr( qnode, './html_urls/video_url', 'format', 'video/mp4') http_url = http_url_ele.text if http_url_ele is not None else None diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py index fe4740aae..c19d04900 100644 --- a/yt_dlp/extractor/openload.py +++ b/yt_dlp/extractor/openload.py @@ -8,7 +8,6 @@ from ..compat import ( compat_urlparse, - compat_kwargs, ) from ..utils import ( check_executable, @@ -158,7 +157,7 @@ def _load_cookies(self): cookie['rest'] = {'httpOnly': None} if 'expiry' in cookie: cookie['expire_time'] = cookie['expiry'] - self.extractor._set_cookie(**compat_kwargs(cookie)) + self.extractor._set_cookie(**cookie) def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'): """ diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index bbc79c2be..749e6dda3 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -12,7 +12,6 @@ ) from ..compat import ( compat_HTTPError, - compat_kwargs, compat_str, ) from ..utils import ( @@ -96,7 +95,7 @@ def _download_json(self, *args, **kwargs): query['client_id'] = self._CLIENT_ID kwargs['query'] = query try: - return super()._download_json(*args, **compat_kwargs(kwargs)) + return super()._download_json(*args, **kwargs) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): self._store_client_id(None) diff --git a/yt_dlp/extractor/udemy.py b/yt_dlp/extractor/udemy.py index 235f89713..77485247f 100644 --- a/yt_dlp/extractor/udemy.py +++ b/yt_dlp/extractor/udemy.py @@ -5,7 +5,6 @@ from .common import InfoExtractor from ..compat import ( compat_HTTPError, - compat_kwargs, compat_str, compat_urllib_request, compat_urlparse, @@ -132,7 +131,7 @@ def _download_webpage_handle(self, *args, **kwargs): headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36' kwargs['headers'] = headers ret = super(UdemyIE, self)._download_webpage_handle( - *args, **compat_kwargs(kwargs)) + *args, **kwargs) if not ret: return ret webpage, _ = ret diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 972fb480b..a00b387f3 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -8,7 +8,6 @@ from .common import InfoExtractor from ..compat import ( - compat_kwargs, compat_HTTPError, compat_str, compat_urlparse, @@ -109,7 +108,7 @@ def _extract_xsrft_and_vuid(self, webpage): def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs): vimeo_config = self._search_regex( r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', - webpage, 'vimeo config', *args, **compat_kwargs(kwargs)) + webpage, 'vimeo config', *args, **kwargs) if vimeo_config: return self._parse_json(vimeo_config, video_id) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 67db6d067..8839b44d4 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -3,14 +3,13 @@ import os.path import optparse import re +import shlex import sys from .compat import ( compat_expanduser, compat_get_terminal_size, compat_getenv, - compat_kwargs, - compat_shlex_split, ) from .utils import ( Config, @@ -223,14 +222,12 @@ def _dict_from_options_callback( fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position) fmt.format_option_strings = _format_option_string - kw = { - 'version': __version__, - 'formatter': fmt, - 'usage': '%prog [OPTIONS] URL [URL...]', - 'conflict_handler': 'resolve', - } - - parser = _YoutubeDLOptionParser(**compat_kwargs(kw)) + parser = _YoutubeDLOptionParser( + version=__version__, + formatter=fmt, + usage='%prog [OPTIONS] URL [URL...]', + conflict_handler='resolve' + ) general = optparse.OptionGroup(parser, 'General Options') general.add_option( @@ -833,7 +830,7 @@ def _dict_from_options_callback( callback_kwargs={ 'allowed_keys': r'ffmpeg_[io]\d*|%s' % '|'.join(map(re.escape, list_external_downloaders())), 'default_key': 'default', - 'process': compat_shlex_split + 'process': shlex.split }, help=( 'Give these arguments to the external downloader. ' 'Specify the downloader name and the arguments separated by a colon ":". ' @@ -1339,7 +1336,7 @@ def _dict_from_options_callback( callback_kwargs={ 'allowed_keys': r'\w+(?:\+\w+)?', 'default_key': 'default-compat', - 'process': compat_shlex_split, + 'process': shlex.split, 'multiple_keys': False }, help=( 'Give these arguments to the postprocessors. ' diff --git a/yt_dlp/postprocessor/sponskrub.py b/yt_dlp/postprocessor/sponskrub.py index 86149aeef..59cf0e0c3 100644 --- a/yt_dlp/postprocessor/sponskrub.py +++ b/yt_dlp/postprocessor/sponskrub.py @@ -1,9 +1,9 @@ from __future__ import unicode_literals import os +import shlex import subprocess from .common import PostProcessor -from ..compat import compat_shlex_split from ..utils import ( check_executable, cli_option, @@ -79,7 +79,7 @@ def run(self, information): if not self.cutout: cmd += ['-chapter'] cmd += cli_option(self._downloader.params, '-proxy', 'proxy') - cmd += compat_shlex_split(self.args) # For backward compatibility + cmd += shlex.split(self.args) # For backward compatibility cmd += self._configuration_args(self._exe_name, use_compat=False) cmd += ['--', information['id'], filename, temp_filename] cmd = [encodeArgument(i) for i in cmd] diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 324b54e78..3f70b1f60 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -41,12 +41,13 @@ import xml.etree.ElementTree import zlib import mimetypes +import urllib.parse +import shlex from .compat import ( compat_HTMLParseError, compat_HTMLParser, compat_HTTPError, - compat_basestring, compat_brotli, compat_chr, compat_cookiejar, @@ -55,28 +56,19 @@ compat_html_entities, compat_html_entities_html5, compat_http_client, - compat_integer_types, - compat_numeric_types, - compat_kwargs, compat_os_name, compat_parse_qs, - compat_shlex_split, compat_shlex_quote, compat_str, compat_struct_pack, compat_struct_unpack, compat_urllib_error, - compat_urllib_parse, compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, - compat_urllib_parse_urlunparse, - compat_urllib_parse_quote, - compat_urllib_parse_quote_plus, compat_urllib_parse_unquote_plus, compat_urllib_request, compat_urlparse, compat_websockets, - compat_xpath, ) from .socks import ( @@ -340,7 +332,7 @@ def xpath_with_ns(path, ns_map): def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT): def _find_xpath(xpath): - return node.find(compat_xpath(xpath)) + return node.find(xpath) if isinstance(xpath, (str, compat_str)): n = _find_xpath(xpath) @@ -1193,7 +1185,7 @@ class XAttrUnavailableError(YoutubeDLError): def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs): - hc = http_class(*args, **compat_kwargs(kwargs)) + hc = http_class(*args, **kwargs) source_address = ydl_handler._params.get('source_address') if source_address is not None: @@ -2401,7 +2393,7 @@ def str_or_none(v, default=None): def str_to_int(int_str): """ A more relaxed version of int_or_none """ - if isinstance(int_str, compat_integer_types): + if isinstance(int_str, int): return int_str elif isinstance(int_str, compat_str): int_str = re.sub(r'[,\.\+]', '', int_str) @@ -2442,7 +2434,7 @@ def request_to_url(req): def strftime_or_none(timestamp, date_format, default=None): datetime_object = None try: - if isinstance(timestamp, compat_numeric_types): # unix timestamp + if isinstance(timestamp, (int, float)): # unix timestamp datetime_object = datetime.datetime.utcfromtimestamp(timestamp) elif isinstance(timestamp, compat_str): # assume YYYYMMDD datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d') @@ -2452,7 +2444,7 @@ def strftime_or_none(timestamp, date_format, default=None): def parse_duration(s): - if not isinstance(s, compat_basestring): + if not isinstance(s, str): return None s = s.strip() if not s: @@ -2789,7 +2781,7 @@ def lowercase_escape(s): def escape_rfc3986(s): """Escape non-ASCII characters as suggested by RFC 3986""" - return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]") + return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]") def escape_url(url): @@ -2975,7 +2967,7 @@ def encode_compat_str(string, encoding=preferredencoding(), errors='strict'): def parse_age_limit(s): if type(s) == int: return s if 0 <= s <= 21 else None - if not isinstance(s, compat_basestring): + if not isinstance(s, str): return None m = re.match(r'^(?P\d{1,2})\+?$', s) if m: @@ -3405,7 +3397,7 @@ def _match_one(filter_part, dct, incomplete): comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote']) actual_value = dct.get(m['key']) numeric_comparison = None - if isinstance(actual_value, compat_numeric_types): + if isinstance(actual_value, (int, float)): # If the original field is a string and matching comparisonvalue is # a number we should respect the origin of the original field # and process comparison value as a string (see @@ -4859,9 +4851,9 @@ def iri_to_uri(iri): net_location = '' if iri_parts.username: - net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~") + net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~") if iri_parts.password is not None: - net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~") + net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~") net_location += '@' net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames. @@ -4869,19 +4861,19 @@ def iri_to_uri(iri): if iri_parts.port is not None and iri_parts.port != 80: net_location += ':' + str(iri_parts.port) - return compat_urllib_parse_urlunparse( + return urllib.parse.urlunparse( (iri_parts.scheme, net_location, - compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"), + urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"), # Unsure about the `safe` argument, since this is a legacy way of handling parameters. - compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"), + urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"), # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component. - compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"), + urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"), - compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~"))) + urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~"))) # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes. @@ -5233,7 +5225,7 @@ def read_file(filename, default=[]): try: # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56 contents = optionf.read() - res = compat_shlex_split(contents, comments=True) + res = shlex.split(contents, comments=True) finally: optionf.close() return res diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py index 962aa57ad..c78078f17 100644 --- a/yt_dlp/webvtt.py +++ b/yt_dlp/webvtt.py @@ -15,7 +15,6 @@ import io from .utils import int_or_none, timetuple_from_msec from .compat import ( - compat_str as str, compat_Pattern, compat_Match, )