[ie/radiko] Extract more metadata (#9115)

Authored by: YoshichikaAAA
This commit is contained in:
YoshichikaAAA 2024-02-04 03:44:17 +09:00 committed by GitHub
parent 4253e3b7f4
commit e3ce2b385e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,5 +1,6 @@
import base64
import random
import re
import urllib.parse
from .common import InfoExtractor
@ -11,6 +12,7 @@
unified_timestamp,
update_url_query,
)
from ..utils.traversal import traverse_obj
class RadikoBaseIE(InfoExtractor):
@ -159,6 +161,12 @@ def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token,
return formats
def _extract_performers(self, prog):
performers = traverse_obj(prog, (
'pfm/text()', ..., {lambda x: re.split(r'[//、 ,]', x)}, ..., {str.strip}))
# TODO: change 'artist' fields to 'artists' and return traversal list instead of str
return ', '.join(performers) or None
class RadikoIE(RadikoBaseIE):
_VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)'
@ -186,10 +194,12 @@ def _real_extract(self, url):
return {
'id': video_id,
'title': try_call(lambda: prog.find('title').text),
'artist': self._extract_performers(prog),
'description': clean_html(try_call(lambda: prog.find('info').text)),
'uploader': try_call(lambda: station_program.find('.//name').text),
'uploader_id': station,
'timestamp': vid_int,
'duration': try_call(lambda: unified_timestamp(radio_end, False) - unified_timestamp(radio_begin, False)),
'is_live': True,
'formats': self._extract_formats(
video_id=video_id, station=station, is_onair=False,
@ -243,6 +253,7 @@ def _real_extract(self, url):
return {
'id': station,
'title': title,
'artist': self._extract_performers(prog),
'description': description,
'uploader': station_name,
'uploader_id': station,