From d37422f1db3cbdf85638eea42e73883ab1c9df10 Mon Sep 17 00:00:00 2001 From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com> Date: Fri, 6 Jan 2023 15:22:25 +0900 Subject: [PATCH] [extractor/biliIntl] Add fallback to `video_data` (#5971) Authored by: HobbyistDev --- yt_dlp/extractor/bilibili.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index dbe212b38..d4b05248f 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -16,6 +16,7 @@ format_field, int_or_none, make_archive_id, + merge_dicts, mimetype2ext, parse_count, parse_qs, @@ -934,6 +935,10 @@ class BiliIntlIE(BiliIntlBaseIE): 'title': 'E2 - The First Night', 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$', 'episode_number': 2, + 'upload_date': '20201009', + 'episode': 'Episode 2', + 'timestamp': 1602259500, + 'description': 'md5:297b5a17155eb645e14a14b385ab547e', } }, { # Non-Bstation page @@ -944,6 +949,10 @@ class BiliIntlIE(BiliIntlBaseIE): 'title': 'E3 - Who?', 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$', 'episode_number': 3, + 'description': 'md5:e1a775e71a35c43f141484715470ad09', + 'episode': 'Episode 3', + 'upload_date': '20211219', + 'timestamp': 1639928700, } }, { # Subtitle with empty content @@ -956,6 +965,17 @@ class BiliIntlIE(BiliIntlBaseIE): 'episode_number': 140, }, 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.' + }, { + 'url': 'https://www.bilibili.tv/en/video/2041863208', + 'info_dict': { + 'id': '2041863208', + 'ext': 'mp4', + 'timestamp': 1670874843, + 'description': 'Scheduled for April 2023.\nStudio: ufotable', + 'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$', + 'upload_date': '20221212', + 'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation', + } }, { 'url': 'https://www.biliintl.com/en/play/34613/341736', 'only_matching': True, @@ -989,7 +1009,7 @@ def _extract_video_metadata(self, url, video_id, season_id): self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={}) or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None)) video_data = traverse_obj( - initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) + initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {} if season_id and not video_data: # Non-Bstation layout, read through episode list @@ -998,7 +1018,12 @@ def _extract_video_metadata(self, url, video_id, season_id): 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id ), expected_type=dict, get_all=False) - return self._parse_video_metadata(video_data) + # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found + return merge_dicts( + self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id), { + 'title': self._html_search_meta('og:title', webpage), + 'description': self._html_search_meta('og:description', webpage) + }) def _real_extract(self, url): season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')