mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-23 09:01:43 +00:00
[ie/NhkRadiru] Extract extended description (#9162)
Authored by: garret1317
This commit is contained in:
parent
43cfd462c0
commit
4392447d94
@ -9,6 +9,7 @@
|
|||||||
join_nonempty,
|
join_nonempty,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
|
try_call,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
@ -473,22 +474,21 @@ class NhkRadiruIE(InfoExtractor):
|
|||||||
IE_DESC = 'NHK らじる (Radiru/Rajiru)'
|
IE_DESC = 'NHK らじる (Radiru/Rajiru)'
|
||||||
_VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?'
|
_VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3853544',
|
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3926210',
|
||||||
'skip': 'Episode expired on 2023-04-16',
|
'skip': 'Episode expired on 2024-02-24',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'channel': 'NHK-FM',
|
'title': 'ジャズ・トゥナイト シリーズJAZZジャイアンツ 56 ジョニー・ホッジス',
|
||||||
'uploader': 'NHK-FM',
|
'id': '0449_01_3926210',
|
||||||
'description': 'md5:94b08bdeadde81a97df4ec882acce3e9',
|
|
||||||
'ext': 'm4a',
|
'ext': 'm4a',
|
||||||
'id': '0449_01_3853544',
|
|
||||||
'series': 'ジャズ・トゥナイト',
|
'series': 'ジャズ・トゥナイト',
|
||||||
|
'uploader': 'NHK-FM',
|
||||||
|
'channel': 'NHK-FM',
|
||||||
'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg',
|
'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg',
|
||||||
'timestamp': 1680969600,
|
'release_date': '20240217',
|
||||||
'title': 'ジャズ・トゥナイト NEWジャズ特集',
|
'description': 'md5:a456ee8e5e59e6dd2a7d32e62386e811',
|
||||||
'upload_date': '20230408',
|
'timestamp': 1708185600,
|
||||||
'release_timestamp': 1680962400,
|
'release_timestamp': 1708178400,
|
||||||
'release_date': '20230408',
|
'upload_date': '20240217',
|
||||||
'was_live': True,
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# playlist, airs every weekday so it should _hopefully_ be okay forever
|
# playlist, airs every weekday so it should _hopefully_ be okay forever
|
||||||
@ -519,7 +519,8 @@ class NhkRadiruIE(InfoExtractor):
|
|||||||
'series': 'らじる文庫 by ラジオ深夜便 ',
|
'series': 'らじる文庫 by ラジオ深夜便 ',
|
||||||
'release_timestamp': 1481126700,
|
'release_timestamp': 1481126700,
|
||||||
'upload_date': '20211101',
|
'upload_date': '20211101',
|
||||||
}
|
},
|
||||||
|
'expected_warnings': ['Unable to download JSON metadata', 'Failed to get extended description'],
|
||||||
}, {
|
}, {
|
||||||
# news
|
# news
|
||||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_3855109',
|
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_3855109',
|
||||||
@ -539,9 +540,28 @@ class NhkRadiruIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
_API_URL_TMPL = None
|
||||||
|
|
||||||
|
def _extract_extended_description(self, episode_id, episode):
|
||||||
|
service, _, area = traverse_obj(episode, ('aa_vinfo2', {str}, {lambda x: (x or '').partition(',')}))
|
||||||
|
aa_vinfo3 = traverse_obj(episode, ('aa_vinfo3', {str}))
|
||||||
|
detail_url = try_call(
|
||||||
|
lambda: self._API_URL_TMPL.format(service=service, area=area, dateid=aa_vinfo3))
|
||||||
|
if not detail_url:
|
||||||
|
return
|
||||||
|
|
||||||
|
full_meta = traverse_obj(
|
||||||
|
self._download_json(detail_url, episode_id, 'Downloading extended metadata', fatal=False),
|
||||||
|
('list', service, 0, {dict})) or {}
|
||||||
|
return join_nonempty('subtitle', 'content', 'act', 'music', delim='\n\n', from_dict=full_meta)
|
||||||
|
|
||||||
def _extract_episode_info(self, headline, programme_id, series_meta):
|
def _extract_episode_info(self, headline, programme_id, series_meta):
|
||||||
episode_id = f'{programme_id}_{headline["headline_id"]}'
|
episode_id = f'{programme_id}_{headline["headline_id"]}'
|
||||||
episode = traverse_obj(headline, ('file_list', 0, {dict}))
|
episode = traverse_obj(headline, ('file_list', 0, {dict}))
|
||||||
|
description = self._extract_extended_description(episode_id, episode)
|
||||||
|
if not description:
|
||||||
|
self.report_warning('Failed to get extended description, falling back to summary')
|
||||||
|
description = traverse_obj(episode, ('file_title_sub', {str}))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
**series_meta,
|
**series_meta,
|
||||||
@ -551,14 +571,21 @@ def _extract_episode_info(self, headline, programme_id, series_meta):
|
|||||||
'was_live': True,
|
'was_live': True,
|
||||||
'series': series_meta.get('title'),
|
'series': series_meta.get('title'),
|
||||||
'thumbnail': url_or_none(headline.get('headline_image')) or series_meta.get('thumbnail'),
|
'thumbnail': url_or_none(headline.get('headline_image')) or series_meta.get('thumbnail'),
|
||||||
|
'description': description,
|
||||||
**traverse_obj(episode, {
|
**traverse_obj(episode, {
|
||||||
'title': 'file_title',
|
'title': 'file_title',
|
||||||
'description': 'file_title_sub',
|
|
||||||
'timestamp': ('open_time', {unified_timestamp}),
|
'timestamp': ('open_time', {unified_timestamp}),
|
||||||
'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}, {unified_timestamp}),
|
'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}, {unified_timestamp}),
|
||||||
}),
|
}),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
if self._API_URL_TMPL:
|
||||||
|
return
|
||||||
|
api_config = self._download_xml(
|
||||||
|
'https://www.nhk.or.jp/radio/config/config_web.xml', None, 'Downloading API config', fatal=False)
|
||||||
|
NhkRadiruIE._API_URL_TMPL = try_call(lambda: f'https:{api_config.find(".//url_program_detail").text}')
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline')
|
site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline')
|
||||||
programme_id = f'{site_id}_{corner_id}'
|
programme_id = f'{site_id}_{corner_id}'
|
||||||
|
Loading…
Reference in New Issue
Block a user