From 628fa244bbce2ad39775a5959e99588f30cac152 Mon Sep 17 00:00:00 2001 From: sepro <4618135+seproDev@users.noreply.github.com> Date: Sun, 26 Nov 2023 03:20:10 +0100 Subject: [PATCH] [ie/floatplane] Add extractors (#8639) Closes #5877, Closes #5912 Authored by: seproDev --- yt_dlp/extractor/_extractors.py | 4 + yt_dlp/extractor/floatplane.py | 268 ++++++++++++++++++++++++++++++++ 2 files changed, 272 insertions(+) create mode 100644 yt_dlp/extractor/floatplane.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 128b86c1a..ad8c7d661 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -642,6 +642,10 @@ from .firsttv import FirstTVIE from .fivetv import FiveTVIE from .flickr import FlickrIE +from .floatplane import ( + FloatplaneIE, + FloatplaneChannelIE, +) from .folketinget import FolketingetIE from .footyroom import FootyRoomIE from .formula1 import Formula1IE diff --git a/yt_dlp/extractor/floatplane.py b/yt_dlp/extractor/floatplane.py new file mode 100644 index 000000000..09abb40bf --- /dev/null +++ b/yt_dlp/extractor/floatplane.py @@ -0,0 +1,268 @@ +import functools + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + OnDemandPagedList, + clean_html, + determine_ext, + format_field, + int_or_none, + join_nonempty, + parse_codecs, + parse_iso8601, + urljoin, +) +from ..utils.traversal import traverse_obj + + +class FloatplaneIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/post/(?P\w+)' + _TESTS = [{ + 'url': 'https://www.floatplane.com/post/2Yf3UedF7C', + 'info_dict': { + 'id': 'yuleLogLTT', + 'ext': 'mp4', + 'display_id': '2Yf3UedF7C', + 'title': '8K Yule Log Fireplace with Crackling Fire Sounds - 10 Hours', + 'description': 'md5:adf2970e0de1c5e3df447818bb0309f6', + 'thumbnail': r're:^https?://.*\.jpe?g$', + 'duration': 36035, + 'comment_count': int, + 'like_count': int, + 'dislike_count': int, + 'release_date': '20191206', + 'release_timestamp': 1575657000, + 'uploader': 'LinusTechTips', + 'uploader_id': '59f94c0bdd241b70349eb72b', + 'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home', + 'channel': 'Linus Tech Tips', + 'channel_id': '63fe42c309e691e4e36de93d', + 'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/main', + 'availability': 'subscriber_only', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.floatplane.com/post/j2jqG3JmgJ', + 'info_dict': { + 'id': 'j2jqG3JmgJ', + 'title': 'TJM: Does Anyone Care About Avatar: The Way of Water?', + 'description': 'md5:00bf17dc5733e4031e99b7fd6489f274', + 'thumbnail': r're:^https?://.*\.jpe?g$', + 'comment_count': int, + 'like_count': int, + 'dislike_count': int, + 'release_timestamp': 1671915900, + 'release_date': '20221224', + 'uploader': 'LinusTechTips', + 'uploader_id': '59f94c0bdd241b70349eb72b', + 'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home', + 'channel': "They're Just Movies", + 'channel_id': '64135f82fc76ab7f9fbdc876', + 'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/tajm', + 'availability': 'subscriber_only', + }, + 'playlist_count': 2, + }, { + 'url': 'https://www.floatplane.com/post/3tK2tInhoN', + 'info_dict': { + 'id': '3tK2tInhoN', + 'title': 'Extras - How Linus Communicates with Editors (Compensator 4)', + 'description': 'md5:83cd40aae1ce124df33769600c80ca5b', + 'thumbnail': r're:^https?://.*\.jpe?g$', + 'comment_count': int, + 'like_count': int, + 'dislike_count': int, + 'release_timestamp': 1700529120, + 'release_date': '20231121', + 'uploader': 'LinusTechTips', + 'uploader_id': '59f94c0bdd241b70349eb72b', + 'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home', + 'channel': 'FP Exclusives', + 'channel_id': '6413623f5b12cca228a28e78', + 'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/fpexclusive', + 'availability': 'subscriber_only', + }, + 'playlist_count': 2, + }, { + 'url': 'https://beta.floatplane.com/post/d870PEFXS1', + 'info_dict': { + 'id': 'bg9SuYKEww', + 'ext': 'mp4', + 'display_id': 'd870PEFXS1', + 'title': 'LCS Drama, TLOU 2 Remaster, Destiny 2 Player Count Drops, + More!', + 'description': 'md5:80d612dcabf41b17487afcbe303ec57d', + 'thumbnail': r're:^https?://.*\.jpe?g$', + 'release_timestamp': 1700622000, + 'release_date': '20231122', + 'duration': 513, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'uploader': 'LinusTechTips', + 'uploader_id': '59f94c0bdd241b70349eb72b', + 'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home', + 'channel': 'GameLinked', + 'channel_id': '649dbade3540dbc3945eeda7', + 'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/gamelinked', + 'availability': 'subscriber_only', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _real_initialize(self): + if not self._get_cookies('https://www.floatplane.com').get('sails.sid'): + self.raise_login_required() + + def _real_extract(self, url): + post_id = self._match_id(url) + + post_data = self._download_json( + 'https://www.floatplane.com/api/v3/content/post', post_id, query={'id': post_id}, + note='Downloading post data', errnote='Unable to download post data') + + if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))): + raise ExtractorError('Post does not contain a video or audio track', expected=True) + + items = [] + for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)): + media_id = media['id'] + media_typ = media.get('type') or 'video' + + metadata = self._download_json( + f'https://www.floatplane.com/api/v3/content/{media_typ}', media_id, query={'id': media_id}, + note=f'Downloading {media_typ} metadata') + + stream = self._download_json( + 'https://www.floatplane.com/api/v2/cdn/delivery', media_id, query={ + 'type': 'vod' if media_typ == 'video' else 'aod', + 'guid': metadata['guid'] + }, note=f'Downloading {media_typ} stream data') + + path_template = traverse_obj(stream, ('resource', 'uri', {str})) + + def format_path(params): + path = path_template + for i, val in (params or {}).items(): + path = path.replace(f'{{qualityLevelParams.{i}}}', val) + return path + + formats = [] + for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)): + url = urljoin(stream['cdn'], format_path(traverse_obj( + stream, ('resource', 'data', 'qualityLevelParams', quality['name'])))) + formats.append({ + **traverse_obj(quality, { + 'format_id': 'name', + 'format_note': 'label', + 'width': ('width', {int}), + 'height': ('height', {int}), + }), + **parse_codecs(quality.get('codecs')), + 'url': url, + 'ext': determine_ext(url.partition('/chunk.m3u8')[0], 'mp4'), + }) + + items.append({ + 'id': media_id, + **traverse_obj(metadata, { + 'title': 'title', + 'duration': ('duration', {int_or_none}), + 'thumbnail': ('thumbnail', 'path'), + }), + 'formats': formats, + }) + + uploader_url = format_field(traverse_obj( + post_data, 'creator'), 'urlname', 'https://www.floatplane.com/channel/%s/home', default=None) + channel_url = urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname'))) + + post_info = { + 'id': post_id, + 'display_id': post_id, + **traverse_obj(post_data, { + 'title': 'title', + 'description': ('text', {clean_html}), + 'uploader': ('creator', 'title'), + 'uploader_id': ('creator', 'id'), + 'channel': ('channel', 'title'), + 'channel_id': ('channel', 'id'), + 'like_count': ('likes', {int_or_none}), + 'dislike_count': ('dislikes', {int_or_none}), + 'comment_count': ('comments', {int_or_none}), + 'release_timestamp': ('releaseDate', {parse_iso8601}), + 'thumbnail': ('thumbnail', 'path'), + }), + 'uploader_url': uploader_url, + 'channel_url': channel_url, + 'availability': self._availability(needs_subscription=True), + } + + if len(items) > 1: + return self.playlist_result(items, **post_info) + + post_info.update(items[0]) + return post_info + + +class FloatplaneChannelIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/channel/(?P[\w-]+)/home(?:/(?P[\w-]+))?' + _PAGE_SIZE = 20 + _TESTS = [{ + 'url': 'https://www.floatplane.com/channel/linustechtips/home/ltxexpo', + 'info_dict': { + 'id': 'linustechtips/ltxexpo', + 'title': 'LTX Expo', + 'description': 'md5:9819002f9ebe7fd7c75a3a1d38a59149', + }, + 'playlist_mincount': 51, + }, { + 'url': 'https://www.floatplane.com/channel/ShankMods/home', + 'info_dict': { + 'id': 'ShankMods', + 'title': 'Shank Mods', + 'description': 'md5:6dff1bb07cad8e5448e04daad9be1b30', + }, + 'playlist_mincount': 14, + }, { + 'url': 'https://beta.floatplane.com/channel/bitwit_ultra/home', + 'info_dict': { + 'id': 'bitwit_ultra', + 'title': 'Bitwit Ultra', + 'description': 'md5:1452f280bb45962976d4789200f676dd', + }, + 'playlist_mincount': 200, + }] + + def _fetch_page(self, display_id, creator_id, channel_id, page): + query = { + 'id': creator_id, + 'limit': self._PAGE_SIZE, + 'fetchAfter': page * self._PAGE_SIZE, + } + if channel_id: + query['channel'] = channel_id + page_data = self._download_json( + 'https://www.floatplane.com/api/v3/content/creator', display_id, + query=query, note=f'Downloading page {page + 1}') + for post in page_data or []: + yield self.url_result( + f'https://www.floatplane.com/post/{post["id"]}', + ie=FloatplaneIE, video_id=post['id'], video_title=post.get('title'), + release_timestamp=parse_iso8601(post.get('releaseDate'))) + + def _real_extract(self, url): + creator, channel = self._match_valid_url(url).group('id', 'channel') + display_id = join_nonempty(creator, channel, delim='/') + + creator_data = self._download_json( + 'https://www.floatplane.com/api/v3/creator/named', + display_id, query={'creatorURL[0]': creator})[0] + + channel_data = traverse_obj( + creator_data, ('channels', lambda _, v: v['urlname'] == channel), get_all=False) or {} + + return self.playlist_result(OnDemandPagedList(functools.partial( + self._fetch_page, display_id, creator_data['id'], channel_data.get('id')), self._PAGE_SIZE), + display_id, playlist_title=channel_data.get('title') or creator_data.get('title'), + playlist_description=channel_data.get('about') or creator_data.get('about'))