[ie/PrankCastPost] Add extractor (#8933)

Authored by: columndeeply
This commit is contained in:
columndeeply 2024-01-31 20:16:07 +00:00 committed by GitHub
parent 4b8b0dded8
commit a2bac6b7ad
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 73 additions and 2 deletions

View File

@ -1518,7 +1518,7 @@
PuhuTVSerieIE, PuhuTVSerieIE,
) )
from .pr0gramm import Pr0grammIE from .pr0gramm import Pr0grammIE
from .prankcast import PrankCastIE from .prankcast import PrankCastIE, PrankCastPostIE
from .premiershiprugby import PremiershipRugbyIE from .premiershiprugby import PremiershipRugbyIE
from .presstv import PressTVIE from .presstv import PressTVIE
from .projectveritas import ProjectVeritasIE from .projectveritas import ProjectVeritasIE

View File

@ -1,5 +1,8 @@
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import parse_iso8601, traverse_obj, try_call from ..utils import float_or_none, parse_iso8601, str_or_none, try_call
from ..utils.traversal import traverse_obj
class PrankCastIE(InfoExtractor): class PrankCastIE(InfoExtractor):
@ -64,3 +67,71 @@ def _real_extract(self, url):
'categories': [json_info.get('broadcast_category')], 'categories': [json_info.get('broadcast_category')],
'tags': try_call(lambda: json_info['broadcast_tags'].split(',')) 'tags': try_call(lambda: json_info['broadcast_tags'].split(','))
} }
class PrankCastPostIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?prankcast\.com/[^/?#]+/posts/(?P<id>\d+)-(?P<display_id>[^/?#]+)'
_TESTS = [{
'url': 'https://prankcast.com/devonanustart/posts/6214-happy-national-rachel-day-',
'info_dict': {
'id': '6214',
'ext': 'mp3',
'title': 'Happy National Rachel Day!',
'display_id': 'happy-national-rachel-day-',
'timestamp': 1704333938,
'uploader': 'Devonanustart',
'channel_id': '4',
'duration': 13175,
'cast': ['Devonanustart'],
'description': '',
'categories': ['prank call'],
'upload_date': '20240104'
}
}, {
'url': 'https://prankcast.com/despicabledogs/posts/6217-jake-the-work-crow-',
'info_dict': {
'id': '6217',
'ext': 'mp3',
'title': 'Jake the Work Crow!',
'display_id': 'jake-the-work-crow-',
'timestamp': 1704346592,
'uploader': 'despicabledogs',
'channel_id': '957',
'duration': 263.287,
'cast': ['despicabledogs'],
'description': 'https://imgur.com/a/vtxLvKU',
'categories': [],
'upload_date': '20240104'
}
}]
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
webpage = self._download_webpage(url, video_id)
post = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['ssr_data_posts']
content = self._parse_json(post['post_contents_json'], video_id)[0]
uploader = post.get('user_name')
guests_json = traverse_obj(content, ('guests_json', {json.loads}, {dict})) or {}
return {
'id': video_id,
'title': post.get('post_title') or self._og_search_title(webpage),
'display_id': display_id,
'url': content.get('url'),
'timestamp': parse_iso8601(content.get('start_date') or content.get('crdate'), ' '),
'uploader': uploader,
'channel_id': str_or_none(post.get('user_id')),
'duration': float_or_none(content.get('duration')),
'cast': list(filter(None, [uploader] + traverse_obj(guests_json, (..., 'name')))),
'description': post.get('post_body'),
'categories': list(filter(None, [content.get('category')])),
'tags': try_call(lambda: list(filter('', post['post_tags'].split(',')))),
'subtitles': {
'live_chat': [{
'url': f'https://prankcast.com/api/private/chat/select-broadcast?id={post["content_id"]}&cache=',
'ext': 'json',
}],
} if post.get('content_id') else None
}