From d7009caa03b48360541e0770a9231ba71f429308 Mon Sep 17 00:00:00 2001 From: shirt-dev <2660574+shirt-dev@users.noreply.github.com> Date: Wed, 10 Mar 2021 09:39:40 -0500 Subject: [PATCH] Improve HLS/DASH external downloader code (#162) Authored by: shirt --- yt_dlp/YoutubeDL.py | 4 +--- yt_dlp/downloader/dash.py | 8 +++++--- yt_dlp/downloader/external.py | 23 ++++++++++------------- yt_dlp/downloader/hls.py | 31 ++++++++++++++++++------------- 4 files changed, 34 insertions(+), 32 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index e58f7a32f..5e3c015ba 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2437,9 +2437,7 @@ def correct_ext(filename): else: assert fixup_policy in ('ignore', 'never') - if (info_dict.get('protocol') == 'm3u8_native' - or info_dict.get('protocol') == 'm3u8' - and self.params.get('hls_prefer_native')): + if get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD': if fixup_policy == 'warn': self.report_warning('%s: malformed AAC bitstream detected.' % ( info_dict['id'])) diff --git a/yt_dlp/downloader/dash.py b/yt_dlp/downloader/dash.py index d758282c1..99acc8db2 100644 --- a/yt_dlp/downloader/dash.py +++ b/yt_dlp/downloader/dash.py @@ -37,7 +37,7 @@ def real_download(self, filename, info_dict): fragment_retries = self.params.get('fragment_retries', 0) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) - fragment_urls = [] + fragments = [] frag_index = 0 for i, fragment in enumerate(fragments): frag_index += 1 @@ -49,7 +49,9 @@ def real_download(self, filename, info_dict): fragment_url = urljoin(fragment_base_url, fragment['path']) if real_downloader: - fragment_urls.append(fragment_url) + fragments.append({ + 'url': fragment_url, + }) continue # In DASH, the first segment contains necessary headers to @@ -90,7 +92,7 @@ def real_download(self, filename, info_dict): if real_downloader: info_copy = info_dict.copy() - info_copy['url_list'] = fragment_urls + info_copy['fragments'] = fragments fd = real_downloader(self.ydl, self.params) # TODO: Make progress updates work without hooking twice # for ph in self._progress_hooks: diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 440603ea3..026a4e382 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -122,18 +122,14 @@ def _call_downloader(self, tmpfilename, info_dict): if p.returncode != 0: self.to_stderr(stderr.decode('utf-8', 'replace')) - if 'url_list' in info_dict: + if 'fragments' in info_dict: file_list = [] - for [i, url] in enumerate(info_dict['url_list']): - tmpsegmentname = '%s_%s.frag' % (tmpfilename, i) - file_list.append(tmpsegmentname) - key_list = info_dict.get('key_list') - decrypt_info = None dest, _ = sanitize_open(tmpfilename, 'wb') - for i, file in enumerate(file_list): + for [i, fragment] in enumerate(info_dict['fragments']): + file = '%s_%s.frag' % (tmpfilename, i) + decrypt_info = fragment.get('decrypt_info') src, _ = sanitize_open(file, 'rb') - if key_list: - decrypt_info = next((x for x in key_list if x['INDEX'] == i), decrypt_info) + if decrypt_info: if decrypt_info['METHOD'] == 'AES-128': iv = decrypt_info.get('IV') decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen( @@ -149,6 +145,7 @@ def _call_downloader(self, tmpfilename, info_dict): fragment_data = src.read() dest.write(fragment_data) src.close() + file_list.append(file) dest.close() if not self.params.get('keep_fragments', False): for file_path in file_list: @@ -248,7 +245,7 @@ class Aria2cFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-c'] dn = os.path.dirname(tmpfilename) - if 'url_list' not in info_dict: + if 'fragments' not in info_dict: cmd += ['--out', os.path.basename(tmpfilename)] verbose_level_args = ['--console-log-level=warn', '--summary-interval=0'] cmd += self._configuration_args(['--file-allocation=none', '-x16', '-j16', '-s16'] + verbose_level_args) @@ -262,14 +259,14 @@ def _make_cmd(self, tmpfilename, info_dict): cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=') cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=') cmd += ['--auto-file-renaming=false'] - if 'url_list' in info_dict: + if 'fragments' in info_dict: cmd += verbose_level_args cmd += ['--uri-selector', 'inorder', '--download-result=hide'] url_list_file = '%s.frag.urls' % tmpfilename url_list = [] - for [i, url] in enumerate(info_dict['url_list']): + for [i, fragment] in enumerate(info_dict['fragments']): tmpsegmentname = '%s_%s.frag' % (os.path.basename(tmpfilename), i) - url_list.append('%s\n\tout=%s' % (url, tmpsegmentname)) + url_list.append('%s\n\tout=%s' % (fragment['url'], tmpsegmentname)) stream, _ = sanitize_open(url_list_file, 'wb') stream.write('\n'.join(url_list).encode('utf-8')) stream.close() diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 6f30842a7..29be6bdf9 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -29,7 +29,7 @@ class HlsFD(FragmentFD): FD_NAME = 'hlsnative' @staticmethod - def can_download(manifest, info_dict, allow_unplayable_formats=False, with_crypto=can_decrypt_frag): + def can_download(manifest, info_dict, allow_unplayable_formats=False, real_downloader=None, with_crypto=can_decrypt_frag): UNSUPPORTED_FEATURES = [ # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] @@ -53,6 +53,10 @@ def can_download(manifest, info_dict, allow_unplayable_formats=False, with_crypt UNSUPPORTED_FEATURES += [ r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] ] + if real_downloader: + UNSUPPORTED_FEATURES += [ + r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] + ] check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES] is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest check_results.append(with_crypto or not is_aes128_enc) @@ -68,7 +72,9 @@ def real_download(self, filename, info_dict): man_url = urlh.geturl() s = urlh.read().decode('utf-8', 'ignore') - if not self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')): + real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None) + + if not self.can_download(s, info_dict, self.params.get('allow_unplayable_formats'), real_downloader): if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'): self.report_error('pycryptodome not found. Please install it.') return False @@ -83,8 +89,6 @@ def real_download(self, filename, info_dict): # fd.add_progress_hook(ph) return fd.real_download(filename, info_dict) - real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None) - def is_ad_fragment_start(s): return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) @@ -93,7 +97,7 @@ def is_ad_fragment_end(s): return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) - fragment_urls = [] + fragments = [] media_frags = 0 ad_frags = 0 @@ -136,7 +140,6 @@ def is_ad_fragment_end(s): i = 0 media_sequence = 0 decrypt_info = {'METHOD': 'NONE'} - key_list = [] byte_range = {} discontinuity_count = 0 frag_index = 0 @@ -161,7 +164,10 @@ def is_ad_fragment_end(s): frag_url = update_url_query(frag_url, extra_query) if real_downloader: - fragment_urls.append(frag_url) + fragments.append({ + 'url': frag_url, + 'decrypt_info': decrypt_info, + }) continue download_frag = True @@ -181,7 +187,10 @@ def is_ad_fragment_end(s): if extra_query: frag_url = update_url_query(frag_url, extra_query) if real_downloader: - fragment_urls.append(frag_url) + fragments.append({ + 'url': frag_url, + 'decrypt_info': decrypt_info, + }) continue if map_info.get('BYTERANGE'): @@ -206,9 +215,6 @@ def is_ad_fragment_end(s): decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) if decrypt_url != decrypt_info['URI']: decrypt_info['KEY'] = None - key_data = decrypt_info.copy() - key_data['INDEX'] = frag_index - key_list.append(key_data) elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): media_sequence = int(line[22:]) @@ -275,8 +281,7 @@ def is_ad_fragment_end(s): if real_downloader: info_copy = info_dict.copy() - info_copy['url_list'] = fragment_urls - info_copy['key_list'] = key_list + info_copy['fragments'] = fragments fd = real_downloader(self.ydl, self.params) # TODO: Make progress updates work without hooking twice # for ph in self._progress_hooks: