From 8fe514d382808dfa9e5c9a950ef45b4545fd7470 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Fri, 21 Jan 2022 11:25:55 +0530 Subject: [PATCH] [CrowdBunker] Add extractors (#2407) Closes: #2356 Authored by: Ashish0804 --- yt_dlp/extractor/crowdbunker.py | 113 ++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 4 ++ 2 files changed, 117 insertions(+) create mode 100644 yt_dlp/extractor/crowdbunker.py diff --git a/yt_dlp/extractor/crowdbunker.py b/yt_dlp/extractor/crowdbunker.py new file mode 100644 index 000000000..72906afef --- /dev/null +++ b/yt_dlp/extractor/crowdbunker.py @@ -0,0 +1,113 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import itertools + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + try_get, + unified_strdate, +) + + +class CrowdBunkerIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/v/(?P[^/?#$&]+)' + + _TESTS = [{ + 'url': 'https://crowdbunker.com/v/0z4Kms8pi8I', + 'info_dict': { + 'id': '0z4Kms8pi8I', + 'ext': 'mp4', + 'title': '117) Pass vax et solutions', + 'description': 'md5:86bcb422c29475dbd2b5dcfa6ec3749c', + 'view_count': int, + 'duration': 5386, + 'uploader': 'Jérémie Mercier', + 'uploader_id': 'UCeN_qQV829NYf0pvPJhW5dQ', + 'like_count': int, + 'upload_date': '20211218', + 'thumbnail': 'https://scw.divulg.org/cb-medias4/images/0z4Kms8pi8I/maxres.jpg' + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + id = self._match_id(url) + data_json = self._download_json(f'https://api.divulg.org/post/{id}/details', + id, headers={'accept': 'application/json, text/plain, */*'}) + video_json = data_json['video'] + formats, subtitles = [], {} + for sub in video_json.get('captions') or []: + sub_url = try_get(sub, lambda x: x['file']['url']) + if not sub_url: + continue + subtitles.setdefault(sub.get('languageCode', 'fr'), []).append({ + 'url': sub_url, + }) + + mpd_url = try_get(video_json, lambda x: x['dashManifest']['url']) + if mpd_url: + fmts, subs = self._extract_mpd_formats_and_subtitles(mpd_url, id) + formats.extend(fmts) + subtitles = self._merge_subtitles(subtitles, subs) + m3u8_url = try_get(video_json, lambda x: x['hlsManifest']['url']) + if m3u8_url: + fmts, subs = self._extract_m3u8_formats_and_subtitles(mpd_url, id) + formats.extend(fmts) + subtitles = self._merge_subtitles(subtitles, subs) + + thumbnails = [{ + 'url': image['url'], + 'height': int_or_none(image.get('height')), + 'width': int_or_none(image.get('width')), + } for image in video_json.get('thumbnails') or [] if image.get('url')] + + self._sort_formats(formats) + return { + 'id': id, + 'title': video_json.get('title'), + 'description': video_json.get('description'), + 'view_count': video_json.get('viewCount'), + 'duration': video_json.get('duration'), + 'uploader': try_get(data_json, lambda x: x['channel']['name']), + 'uploader_id': try_get(data_json, lambda x: x['channel']['id']), + 'like_count': data_json.get('likesCount'), + 'upload_date': unified_strdate(video_json.get('publishedAt') or video_json.get('createdAt')), + 'thumbnails': thumbnails, + 'formats': formats, + 'subtitles': subtitles, + } + + +class CrowdBunkerChannelIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/@(?P[^/?#$&]+)' + + _TESTS = [{ + 'url': 'https://crowdbunker.com/@Milan_UHRIN', + 'playlist_mincount': 14, + 'info_dict': { + 'id': 'Milan_UHRIN', + }, + }] + + def _entries(self, id): + last = None + + for page in itertools.count(): + channel_json = self._download_json( + f'https://api.divulg.org/organization/{id}/posts', id, headers={'accept': 'application/json, text/plain, */*'}, + query={'after': last} if last else {}, note=f'Downloading Page {page}') + for item in channel_json.get('items') or []: + v_id = item.get('uid') + if not v_id: + continue + yield self.url_result( + 'https://crowdbunker.com/v/%s' % v_id, ie=CrowdBunkerIE.ie_key(), video_id=v_id) + last = channel_json.get('last') + if not last: + break + + def _real_extract(self, url): + id = self._match_id(url) + return self.playlist_result(self._entries(id), playlist_id=id) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 2d707a575..d659d7a8e 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -304,6 +304,10 @@ from .cracked import CrackedIE from .crackle import CrackleIE from .crooksandliars import CrooksAndLiarsIE +from .crowdbunker import ( + CrowdBunkerIE, + CrowdBunkerChannelIE, +) from .crunchyroll import ( CrunchyrollIE, CrunchyrollShowPlaylistIE,