From ff91cf748343c41a74b09120896feccd390f91ce Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 9 Mar 2022 02:24:41 +0530 Subject: [PATCH] [utils] Add `get_first` --- yt_dlp/extractor/facebook.py | 9 ++++----- yt_dlp/extractor/tiktok.py | 4 ++-- yt_dlp/extractor/youtube.py | 5 +---- yt_dlp/utils.py | 4 ++++ 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index d39dcc058..ef57b221c 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -18,6 +18,7 @@ ExtractorError, float_or_none, get_element_by_id, + get_first, int_or_none, js_to_json, merge_dicts, @@ -405,11 +406,9 @@ def extract_metadata(webpage): ..., 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or [] media = [m for m in traverse_obj(post, (..., 'attachments', ..., 'media'), expected_type=dict) or [] if str(m.get('id')) == video_id and m.get('__typename') == 'Video'] - title = traverse_obj(media, (..., 'title', 'text'), get_all=False) - description = traverse_obj(media, ( - ..., 'creation_story', 'comet_sections', 'message', 'story', 'message', 'text'), get_all=False) - uploader_data = (traverse_obj(media, (..., 'owner'), get_all=False) - or traverse_obj(post, (..., 'node', 'actors', ...), get_all=False) or {}) + title = get_first(media, ('title', 'text')) + description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text')) + uploader_data = get_first(media, 'owner') or get_first(post, ('node', 'actors', ...)) or {} page_title = title or self._html_search_regex(( r']*class="uiHeaderTitle"[^>]*>(?P[^<]*)', diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 620973a9f..56cc2dcc6 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -15,6 +15,7 @@ from ..utils import ( ExtractorError, HEADRequest, + get_first, int_or_none, join_nonempty, LazyList, @@ -816,8 +817,7 @@ def _real_extract(self, url): render_data = self._parse_json( render_data_json, video_id, transform_source=compat_urllib_parse_unquote) - return self._parse_aweme_video_web( - traverse_obj(render_data, (..., 'aweme', 'detail'), get_all=False), url) + return self._parse_aweme_video_web(get_first(render_data, ('aweme', 'detail')), url) class TikTokVMIE(InfoExtractor): diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index da49df8cd..66bb8d9f0 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -39,6 +39,7 @@ ExtractorError, float_or_none, format_field, + get_first, int_or_none, is_html, join_nonempty, @@ -72,10 +73,6 @@ ) -def get_first(obj, keys, **kwargs): - return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False) - - # any clients starting with _ cannot be explicity requested by the user INNERTUBE_CLIENTS = { 'web': { diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 10a9a72ff..9b130e109 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -5218,6 +5218,10 @@ def traverse_dict(dictn, keys, casesense=True): return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True) +def get_first(obj, keys, **kwargs): + return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False) + + def variadic(x, allowed_types=(str, bytes, dict)): return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)