mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-18 20:27:52 +00:00
Determine merge container better (See desc) (#1482)
* Determine the container early. Closes #4069 * Use codecs instead of just file extensions * Obey `--prefer-free-formats` * Allow fallbacks in `--merge-output` Authored by: pukkandan, selfisekai
This commit is contained in:
parent
fe0918bb65
commit
fc61aff41b
@ -858,10 +858,10 @@ ## Video Format Options:
|
||||
downloadable
|
||||
-F, --list-formats List available formats of each video.
|
||||
Simulate unless --no-simulate is used
|
||||
--merge-output-format FORMAT Container to use when merging formats (e.g.
|
||||
bestvideo+bestaudio). Ignored if no merge is
|
||||
required. (currently supported: avi, flv,
|
||||
mkv, mov, mp4, webm)
|
||||
--merge-output-format FORMAT Containers that may be used when merging
|
||||
formats, separated by "/" (Eg: "mp4/mkv").
|
||||
Ignored if no merge is required. (currently
|
||||
supported: avi, flv, mkv, mov, mp4, webm)
|
||||
|
||||
## Subtitle Options:
|
||||
--write-subs Write subtitle file
|
||||
|
@ -53,6 +53,7 @@
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
format_bytes,
|
||||
get_compatible_ext,
|
||||
get_element_by_attribute,
|
||||
get_element_by_class,
|
||||
get_element_html_by_attribute,
|
||||
@ -1843,6 +1844,31 @@ def test_determine_file_encoding(self):
|
||||
self.assertEqual(determine_file_encoding('# coding: utf-32-be'.encode('utf-32-be')), ('utf-32-be', 0))
|
||||
self.assertEqual(determine_file_encoding('# coding: utf-16-le'.encode('utf-16-le')), ('utf-16-le', 0))
|
||||
|
||||
def test_get_compatible_ext(self):
|
||||
self.assertEqual(get_compatible_ext(
|
||||
vcodecs=[None], acodecs=[None, None], vexts=['mp4'], aexts=['m4a', 'm4a']), 'mkv')
|
||||
self.assertEqual(get_compatible_ext(
|
||||
vcodecs=[None], acodecs=[None], vexts=['flv'], aexts=['flv']), 'flv')
|
||||
|
||||
self.assertEqual(get_compatible_ext(
|
||||
vcodecs=[None], acodecs=[None], vexts=['mp4'], aexts=['m4a']), 'mp4')
|
||||
self.assertEqual(get_compatible_ext(
|
||||
vcodecs=[None], acodecs=[None], vexts=['mp4'], aexts=['webm']), 'mkv')
|
||||
self.assertEqual(get_compatible_ext(
|
||||
vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['m4a']), 'mkv')
|
||||
self.assertEqual(get_compatible_ext(
|
||||
vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['webm']), 'webm')
|
||||
|
||||
self.assertEqual(get_compatible_ext(
|
||||
vcodecs=['h264'], acodecs=['mp4a'], vexts=['mov'], aexts=['m4a']), 'mp4')
|
||||
self.assertEqual(get_compatible_ext(
|
||||
vcodecs=['av01.0.12M.08'], acodecs=['opus'], vexts=['mp4'], aexts=['webm']), 'webm')
|
||||
|
||||
self.assertEqual(get_compatible_ext(
|
||||
vcodecs=['vp9'], acodecs=['opus'], vexts=['webm'], aexts=['webm'], preferences=['flv', 'mp4']), 'mp4')
|
||||
self.assertEqual(get_compatible_ext(
|
||||
vcodecs=['av1'], acodecs=['mp4a'], vexts=['webm'], aexts=['m4a'], preferences=('webm', 'mkv')), 'mkv')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -102,6 +102,7 @@
|
||||
format_decimal_suffix,
|
||||
format_field,
|
||||
formatSeconds,
|
||||
get_compatible_ext,
|
||||
get_domain,
|
||||
int_or_none,
|
||||
iri_to_uri,
|
||||
@ -134,6 +135,7 @@
|
||||
timetuple_from_msec,
|
||||
to_high_limit_path,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
try_get,
|
||||
url_basename,
|
||||
variadic,
|
||||
@ -372,7 +374,7 @@ class YoutubeDL:
|
||||
|
||||
Progress hooks are guaranteed to be called at least twice
|
||||
(with status "started" and "finished") if the processing is successful.
|
||||
merge_output_format: Extension to use when merging formats.
|
||||
merge_output_format: "/" separated list of extensions to use when merging formats.
|
||||
final_ext: Expected final extension; used to detect when the file was
|
||||
already downloaded and converted
|
||||
fixup: Automatically correct known faults of the file.
|
||||
@ -2088,14 +2090,13 @@ def _merge(formats_pair):
|
||||
the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
|
||||
the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
|
||||
|
||||
output_ext = self.params.get('merge_output_format')
|
||||
if not output_ext:
|
||||
if the_only_video:
|
||||
output_ext = the_only_video['ext']
|
||||
elif the_only_audio and not video_fmts:
|
||||
output_ext = the_only_audio['ext']
|
||||
else:
|
||||
output_ext = 'mkv'
|
||||
output_ext = get_compatible_ext(
|
||||
vcodecs=[f.get('vcodec') for f in video_fmts],
|
||||
acodecs=[f.get('acodec') for f in audio_fmts],
|
||||
vexts=[f['ext'] for f in video_fmts],
|
||||
aexts=[f['ext'] for f in audio_fmts],
|
||||
preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
|
||||
or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
|
||||
|
||||
filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
|
||||
|
||||
@ -3067,33 +3068,9 @@ def existing_video_file(*filepaths):
|
||||
return
|
||||
|
||||
if info_dict.get('requested_formats') is not None:
|
||||
|
||||
def compatible_formats(formats):
|
||||
# TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
|
||||
video_formats = [format for format in formats if format.get('vcodec') != 'none']
|
||||
audio_formats = [format for format in formats if format.get('acodec') != 'none']
|
||||
if len(video_formats) > 2 or len(audio_formats) > 2:
|
||||
return False
|
||||
|
||||
# Check extension
|
||||
exts = {format.get('ext') for format in formats}
|
||||
COMPATIBLE_EXTS = (
|
||||
{'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'},
|
||||
{'webm'},
|
||||
)
|
||||
for ext_sets in COMPATIBLE_EXTS:
|
||||
if ext_sets.issuperset(exts):
|
||||
return True
|
||||
# TODO: Check acodec/vcodec
|
||||
return False
|
||||
|
||||
requested_formats = info_dict['requested_formats']
|
||||
old_ext = info_dict['ext']
|
||||
if self.params.get('merge_output_format') is None:
|
||||
if not compatible_formats(requested_formats):
|
||||
info_dict['ext'] = 'mkv'
|
||||
self.report_warning(
|
||||
'Requested formats are incompatible for merge and will be merged into mkv')
|
||||
if (info_dict['ext'] == 'webm'
|
||||
and info_dict.get('thumbnails')
|
||||
# check with type instead of pp_key, __name__, or isinstance
|
||||
|
@ -228,7 +228,8 @@ def validate_minmax(min_val, max_val, min_name, max_name=None):
|
||||
validate_regex('format sorting', f, InfoExtractor.FormatSort.regex)
|
||||
|
||||
# Postprocessor formats
|
||||
validate_in('merge output format', opts.merge_output_format, FFmpegMergerPP.SUPPORTED_EXTS)
|
||||
validate_regex('merge output format', opts.merge_output_format,
|
||||
r'({0})(/({0}))*'.format('|'.join(map(re.escape, FFmpegMergerPP.SUPPORTED_EXTS))))
|
||||
validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE)
|
||||
validate_in('subtitle format', opts.convertsubtitles, FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS)
|
||||
validate_regex('thumbnail format', opts.convertthumbnails, FFmpegThumbnailsConvertorPP.FORMAT_RE)
|
||||
|
@ -782,7 +782,8 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
||||
'--merge-output-format',
|
||||
action='store', dest='merge_output_format', metavar='FORMAT', default=None,
|
||||
help=(
|
||||
'Container to use when merging formats (e.g. bestvideo+bestaudio). Ignored if no merge is required. '
|
||||
'Containers that may be used when merging formats, separated by "/" (Eg: "mp4/mkv"). '
|
||||
'Ignored if no merge is required. '
|
||||
f'(currently supported: {", ".join(sorted(FFmpegMergerPP.SUPPORTED_EXTS))})'))
|
||||
video_format.add_option(
|
||||
'--allow-unplayable-formats',
|
||||
|
@ -3456,6 +3456,46 @@ def parse_codecs(codecs_str):
|
||||
return {}
|
||||
|
||||
|
||||
def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
|
||||
assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
|
||||
|
||||
allow_mkv = not preferences or 'mkv' in preferences
|
||||
|
||||
if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
|
||||
return 'mkv' # TODO: any other format allows this?
|
||||
|
||||
# TODO: All codecs supported by parse_codecs isn't handled here
|
||||
COMPATIBLE_CODECS = {
|
||||
'mp4': {
|
||||
'av1', 'hevc', 'avc1', 'mp4a', # fourcc (m3u8, mpd)
|
||||
'h264', 'aacl', # Set in ISM
|
||||
},
|
||||
'webm': {
|
||||
'av1', 'vp9', 'vp8', 'opus', 'vrbs',
|
||||
'vp9x', 'vp8x', # in the webm spec
|
||||
},
|
||||
}
|
||||
|
||||
sanitize_codec = functools.partial(try_get, getter=lambda x: x.split('.')[0].replace('0', ''))
|
||||
vcodec, acodec = sanitize_codec(vcodecs[0]), sanitize_codec(acodecs[0])
|
||||
|
||||
for ext in preferences or COMPATIBLE_CODECS.keys():
|
||||
codec_set = COMPATIBLE_CODECS.get(ext, set())
|
||||
if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
|
||||
return ext
|
||||
|
||||
COMPATIBLE_EXTS = (
|
||||
{'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
|
||||
{'webm'},
|
||||
)
|
||||
for ext in preferences or vexts:
|
||||
current_exts = {ext, *vexts, *aexts}
|
||||
if ext == 'mkv' or current_exts == {ext} or any(
|
||||
ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
|
||||
return ext
|
||||
return 'mkv' if allow_mkv else preferences[-1]
|
||||
|
||||
|
||||
def urlhandle_detect_ext(url_handle):
|
||||
getheader = url_handle.headers.get
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user