From ff9b0e071ffae5543cc309e6f9e647ac51e5846e Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 14 Jun 2023 19:08:46 +0530 Subject: [PATCH] [extractor/youtube] Determine audio language using automatic captions --- yt_dlp/extractor/youtube.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 47ad1da76..606f24d04 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4312,9 +4312,13 @@ def process_language(container, base_url, lang_code, sub_name, query): continue trans_code += f'-{lang_code}' trans_name += format_field(lang_name, None, ' from %s') - # Add an "-orig" label to the original language so that it can be distinguished. - # The subs are returned without "-orig" as well for compatibility if lang_code == f'a-{orig_trans_code}': + # Set audio language based on original subtitles + for f in formats: + if f.get('acodec') != 'none' and not f.get('language'): + f['language'] = orig_trans_code + # Add an "-orig" label to the original language so that it can be distinguished. + # The subs are returned without "-orig" as well for compatibility process_language( automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {}) # Setting tlang=lang returns damaged subtitles.