Improve HLS/DASH external downloader code (#162)

Authored by: shirt
This commit is contained in:
shirt-dev 2021-03-10 09:39:40 -05:00 committed by GitHub
parent 54759df586
commit d7009caa03
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 34 additions and 32 deletions

View File

@ -2437,9 +2437,7 @@ def correct_ext(filename):
else: else:
assert fixup_policy in ('ignore', 'never') assert fixup_policy in ('ignore', 'never')
if (info_dict.get('protocol') == 'm3u8_native' if get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD':
or info_dict.get('protocol') == 'm3u8'
and self.params.get('hls_prefer_native')):
if fixup_policy == 'warn': if fixup_policy == 'warn':
self.report_warning('%s: malformed AAC bitstream detected.' % ( self.report_warning('%s: malformed AAC bitstream detected.' % (
info_dict['id'])) info_dict['id']))

View File

@ -37,7 +37,7 @@ def real_download(self, filename, info_dict):
fragment_retries = self.params.get('fragment_retries', 0) fragment_retries = self.params.get('fragment_retries', 0)
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
fragment_urls = [] fragments = []
frag_index = 0 frag_index = 0
for i, fragment in enumerate(fragments): for i, fragment in enumerate(fragments):
frag_index += 1 frag_index += 1
@ -49,7 +49,9 @@ def real_download(self, filename, info_dict):
fragment_url = urljoin(fragment_base_url, fragment['path']) fragment_url = urljoin(fragment_base_url, fragment['path'])
if real_downloader: if real_downloader:
fragment_urls.append(fragment_url) fragments.append({
'url': fragment_url,
})
continue continue
# In DASH, the first segment contains necessary headers to # In DASH, the first segment contains necessary headers to
@ -90,7 +92,7 @@ def real_download(self, filename, info_dict):
if real_downloader: if real_downloader:
info_copy = info_dict.copy() info_copy = info_dict.copy()
info_copy['url_list'] = fragment_urls info_copy['fragments'] = fragments
fd = real_downloader(self.ydl, self.params) fd = real_downloader(self.ydl, self.params)
# TODO: Make progress updates work without hooking twice # TODO: Make progress updates work without hooking twice
# for ph in self._progress_hooks: # for ph in self._progress_hooks:

View File

@ -122,18 +122,14 @@ def _call_downloader(self, tmpfilename, info_dict):
if p.returncode != 0: if p.returncode != 0:
self.to_stderr(stderr.decode('utf-8', 'replace')) self.to_stderr(stderr.decode('utf-8', 'replace'))
if 'url_list' in info_dict: if 'fragments' in info_dict:
file_list = [] file_list = []
for [i, url] in enumerate(info_dict['url_list']):
tmpsegmentname = '%s_%s.frag' % (tmpfilename, i)
file_list.append(tmpsegmentname)
key_list = info_dict.get('key_list')
decrypt_info = None
dest, _ = sanitize_open(tmpfilename, 'wb') dest, _ = sanitize_open(tmpfilename, 'wb')
for i, file in enumerate(file_list): for [i, fragment] in enumerate(info_dict['fragments']):
file = '%s_%s.frag' % (tmpfilename, i)
decrypt_info = fragment.get('decrypt_info')
src, _ = sanitize_open(file, 'rb') src, _ = sanitize_open(file, 'rb')
if key_list: if decrypt_info:
decrypt_info = next((x for x in key_list if x['INDEX'] == i), decrypt_info)
if decrypt_info['METHOD'] == 'AES-128': if decrypt_info['METHOD'] == 'AES-128':
iv = decrypt_info.get('IV') iv = decrypt_info.get('IV')
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen( decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
@ -149,6 +145,7 @@ def _call_downloader(self, tmpfilename, info_dict):
fragment_data = src.read() fragment_data = src.read()
dest.write(fragment_data) dest.write(fragment_data)
src.close() src.close()
file_list.append(file)
dest.close() dest.close()
if not self.params.get('keep_fragments', False): if not self.params.get('keep_fragments', False):
for file_path in file_list: for file_path in file_list:
@ -248,7 +245,7 @@ class Aria2cFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-c'] cmd = [self.exe, '-c']
dn = os.path.dirname(tmpfilename) dn = os.path.dirname(tmpfilename)
if 'url_list' not in info_dict: if 'fragments' not in info_dict:
cmd += ['--out', os.path.basename(tmpfilename)] cmd += ['--out', os.path.basename(tmpfilename)]
verbose_level_args = ['--console-log-level=warn', '--summary-interval=0'] verbose_level_args = ['--console-log-level=warn', '--summary-interval=0']
cmd += self._configuration_args(['--file-allocation=none', '-x16', '-j16', '-s16'] + verbose_level_args) cmd += self._configuration_args(['--file-allocation=none', '-x16', '-j16', '-s16'] + verbose_level_args)
@ -262,14 +259,14 @@ def _make_cmd(self, tmpfilename, info_dict):
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=') cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=') cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
cmd += ['--auto-file-renaming=false'] cmd += ['--auto-file-renaming=false']
if 'url_list' in info_dict: if 'fragments' in info_dict:
cmd += verbose_level_args cmd += verbose_level_args
cmd += ['--uri-selector', 'inorder', '--download-result=hide'] cmd += ['--uri-selector', 'inorder', '--download-result=hide']
url_list_file = '%s.frag.urls' % tmpfilename url_list_file = '%s.frag.urls' % tmpfilename
url_list = [] url_list = []
for [i, url] in enumerate(info_dict['url_list']): for [i, fragment] in enumerate(info_dict['fragments']):
tmpsegmentname = '%s_%s.frag' % (os.path.basename(tmpfilename), i) tmpsegmentname = '%s_%s.frag' % (os.path.basename(tmpfilename), i)
url_list.append('%s\n\tout=%s' % (url, tmpsegmentname)) url_list.append('%s\n\tout=%s' % (fragment['url'], tmpsegmentname))
stream, _ = sanitize_open(url_list_file, 'wb') stream, _ = sanitize_open(url_list_file, 'wb')
stream.write('\n'.join(url_list).encode('utf-8')) stream.write('\n'.join(url_list).encode('utf-8'))
stream.close() stream.close()

View File

@ -29,7 +29,7 @@ class HlsFD(FragmentFD):
FD_NAME = 'hlsnative' FD_NAME = 'hlsnative'
@staticmethod @staticmethod
def can_download(manifest, info_dict, allow_unplayable_formats=False, with_crypto=can_decrypt_frag): def can_download(manifest, info_dict, allow_unplayable_formats=False, real_downloader=None, with_crypto=can_decrypt_frag):
UNSUPPORTED_FEATURES = [ UNSUPPORTED_FEATURES = [
# r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
@ -53,6 +53,10 @@ def can_download(manifest, info_dict, allow_unplayable_formats=False, with_crypt
UNSUPPORTED_FEATURES += [ UNSUPPORTED_FEATURES += [
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
] ]
if real_downloader:
UNSUPPORTED_FEATURES += [
r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
]
check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES] check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
check_results.append(with_crypto or not is_aes128_enc) check_results.append(with_crypto or not is_aes128_enc)
@ -68,7 +72,9 @@ def real_download(self, filename, info_dict):
man_url = urlh.geturl() man_url = urlh.geturl()
s = urlh.read().decode('utf-8', 'ignore') s = urlh.read().decode('utf-8', 'ignore')
if not self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')): real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None)
if not self.can_download(s, info_dict, self.params.get('allow_unplayable_formats'), real_downloader):
if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'): if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'):
self.report_error('pycryptodome not found. Please install it.') self.report_error('pycryptodome not found. Please install it.')
return False return False
@ -83,8 +89,6 @@ def real_download(self, filename, info_dict):
# fd.add_progress_hook(ph) # fd.add_progress_hook(ph)
return fd.real_download(filename, info_dict) return fd.real_download(filename, info_dict)
real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None)
def is_ad_fragment_start(s): def is_ad_fragment_start(s):
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
@ -93,7 +97,7 @@ def is_ad_fragment_end(s):
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s
or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
fragment_urls = [] fragments = []
media_frags = 0 media_frags = 0
ad_frags = 0 ad_frags = 0
@ -136,7 +140,6 @@ def is_ad_fragment_end(s):
i = 0 i = 0
media_sequence = 0 media_sequence = 0
decrypt_info = {'METHOD': 'NONE'} decrypt_info = {'METHOD': 'NONE'}
key_list = []
byte_range = {} byte_range = {}
discontinuity_count = 0 discontinuity_count = 0
frag_index = 0 frag_index = 0
@ -161,7 +164,10 @@ def is_ad_fragment_end(s):
frag_url = update_url_query(frag_url, extra_query) frag_url = update_url_query(frag_url, extra_query)
if real_downloader: if real_downloader:
fragment_urls.append(frag_url) fragments.append({
'url': frag_url,
'decrypt_info': decrypt_info,
})
continue continue
download_frag = True download_frag = True
@ -181,7 +187,10 @@ def is_ad_fragment_end(s):
if extra_query: if extra_query:
frag_url = update_url_query(frag_url, extra_query) frag_url = update_url_query(frag_url, extra_query)
if real_downloader: if real_downloader:
fragment_urls.append(frag_url) fragments.append({
'url': frag_url,
'decrypt_info': decrypt_info,
})
continue continue
if map_info.get('BYTERANGE'): if map_info.get('BYTERANGE'):
@ -206,9 +215,6 @@ def is_ad_fragment_end(s):
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
if decrypt_url != decrypt_info['URI']: if decrypt_url != decrypt_info['URI']:
decrypt_info['KEY'] = None decrypt_info['KEY'] = None
key_data = decrypt_info.copy()
key_data['INDEX'] = frag_index
key_list.append(key_data)
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
media_sequence = int(line[22:]) media_sequence = int(line[22:])
@ -275,8 +281,7 @@ def is_ad_fragment_end(s):
if real_downloader: if real_downloader:
info_copy = info_dict.copy() info_copy = info_dict.copy()
info_copy['url_list'] = fragment_urls info_copy['fragments'] = fragments
info_copy['key_list'] = key_list
fd = real_downloader(self.ydl, self.params) fd = real_downloader(self.ydl, self.params)
# TODO: Make progress updates work without hooking twice # TODO: Make progress updates work without hooking twice
# for ph in self._progress_hooks: # for ph in self._progress_hooks: