[utils] Add get_first

This commit is contained in:
pukkandan 2022-03-09 02:24:41 +05:30
parent a3b7dff015
commit ff91cf7483
No known key found for this signature in database
GPG Key ID: 7EEE9E1E817D0A39
4 changed files with 11 additions and 11 deletions

View File

@ -18,6 +18,7 @@
ExtractorError,
float_or_none,
get_element_by_id,
get_first,
int_or_none,
js_to_json,
merge_dicts,
@ -405,11 +406,9 @@ def extract_metadata(webpage):
..., 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
media = [m for m in traverse_obj(post, (..., 'attachments', ..., 'media'), expected_type=dict) or []
if str(m.get('id')) == video_id and m.get('__typename') == 'Video']
title = traverse_obj(media, (..., 'title', 'text'), get_all=False)
description = traverse_obj(media, (
..., 'creation_story', 'comet_sections', 'message', 'story', 'message', 'text'), get_all=False)
uploader_data = (traverse_obj(media, (..., 'owner'), get_all=False)
or traverse_obj(post, (..., 'node', 'actors', ...), get_all=False) or {})
title = get_first(media, ('title', 'text'))
description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text'))
uploader_data = get_first(media, 'owner') or get_first(post, ('node', 'actors', ...)) or {}
page_title = title or self._html_search_regex((
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>',

View File

@ -15,6 +15,7 @@
from ..utils import (
ExtractorError,
HEADRequest,
get_first,
int_or_none,
join_nonempty,
LazyList,
@ -816,8 +817,7 @@ def _real_extract(self, url):
render_data = self._parse_json(
render_data_json, video_id, transform_source=compat_urllib_parse_unquote)
return self._parse_aweme_video_web(
traverse_obj(render_data, (..., 'aweme', 'detail'), get_all=False), url)
return self._parse_aweme_video_web(get_first(render_data, ('aweme', 'detail')), url)
class TikTokVMIE(InfoExtractor):

View File

@ -39,6 +39,7 @@
ExtractorError,
float_or_none,
format_field,
get_first,
int_or_none,
is_html,
join_nonempty,
@ -72,10 +73,6 @@
)
def get_first(obj, keys, **kwargs):
return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
# any clients starting with _ cannot be explicity requested by the user
INNERTUBE_CLIENTS = {
'web': {

View File

@ -5218,6 +5218,10 @@ def traverse_dict(dictn, keys, casesense=True):
return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
def get_first(obj, keys, **kwargs):
return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
def variadic(x, allowed_types=(str, bytes, dict)):
return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)