From 34859e4b32a7c2c74a54c6734678e8513885da43 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Tue, 4 Oct 2022 17:14:57 +1300 Subject: [PATCH] [extractor/onenewsnz] Add extractor (#5088) Authored by: coletdjnz --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/onenewsnz.py | 112 ++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 yt_dlp/extractor/onenewsnz.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 3ecd7748b..44c189f79 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1234,6 +1234,7 @@ from .on24 import On24IE from .ondemandkorea import OnDemandKoreaIE from .onefootball import OneFootballIE +from .onenewsnz import OneNewsNZIE from .onet import ( OnetIE, OnetChannelIE, diff --git a/yt_dlp/extractor/onenewsnz.py b/yt_dlp/extractor/onenewsnz.py new file mode 100644 index 000000000..59d4490d0 --- /dev/null +++ b/yt_dlp/extractor/onenewsnz.py @@ -0,0 +1,112 @@ +from .brightcove import BrightcoveNewIE +from .common import InfoExtractor + +from ..utils import ( + ExtractorError, + traverse_obj +) + + +class OneNewsNZIE(InfoExtractor): + IE_NAME = '1News' + IE_DESC = '1news.co.nz article videos' + _VALID_URL = r'https?://(?:www\.)?(?:1|one)news\.co\.nz/\d+/\d+/\d+/(?P[^/?#&]+)' + _TESTS = [ + { # Brightcove video + 'url': 'https://www.1news.co.nz/2022/09/29/cows-painted-green-on-parliament-lawn-in-climate-protest/', + 'info_dict': { + 'id': 'cows-painted-green-on-parliament-lawn-in-climate-protest', + 'title': '\'Cows\' painted green on Parliament lawn in climate protest', + }, + 'playlist': [{ + 'info_dict': { + 'id': '6312993358112', + 'title': 'Activists dressed as cows painted green outside Parliament in climate protest', + 'ext': 'mp4', + 'tags': 'count:6', + 'uploader_id': '963482464001', + 'timestamp': 1664416255, + 'upload_date': '20220929', + 'duration': 38.272, + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': 'Greenpeace accused the Government of "greenwashing" instead of taking climate action.', + } + }] + }, { + # YouTube video + 'url': 'https://www.1news.co.nz/2022/09/30/now-is-the-time-to-care-about-womens-rugby/', + 'info_dict': { + 'id': 'now-is-the-time-to-care-about-womens-rugby', + 'title': 'Now is the time to care about women\'s rugby', + }, + 'playlist': [{ + 'info_dict': { + 'id': 's4wEB9neTfU', + 'title': 'Why I love women’s rugby: Black Fern Ruahei Demant', + 'ext': 'mp4', + 'channel_follower_count': int, + 'channel_url': 'https://www.youtube.com/channel/UC2BQ3U9IxoYIJyulv0bN5PQ', + 'tags': 'count:12', + 'uploader': 'Re: News', + 'upload_date': '20211215', + 'uploader_id': 'UC2BQ3U9IxoYIJyulv0bN5PQ', + 'uploader_url': 'http://www.youtube.com/channel/UC2BQ3U9IxoYIJyulv0bN5PQ', + 'channel_id': 'UC2BQ3U9IxoYIJyulv0bN5PQ', + 'channel': 'Re: News', + 'like_count': int, + 'thumbnail': 'https://i.ytimg.com/vi/s4wEB9neTfU/maxresdefault.jpg', + 'age_limit': 0, + 'view_count': int, + 'categories': ['Sports'], + 'duration': 222, + 'description': 'md5:8874410e5740ed1d8fd0df839f849813', + 'availability': 'public', + 'playable_in_embed': True, + 'live_status': 'not_live', + } + }] + }, { + # 2 Brightcove videos + 'url': 'https://www.1news.co.nz/2022/09/29/raw-videos-capture-hurricane-ians-fury-as-it-slams-florida/', + 'info_dict': { + 'id': 'raw-videos-capture-hurricane-ians-fury-as-it-slams-florida', + 'title': 'Raw videos capture Hurricane Ian\'s fury as it slams Florida', + }, + 'playlist_mincount': 2, + }, { + 'url': 'https://www.onenews.co.nz/2022/09/29/cows-painted-green-on-parliament-lawn-in-climate-protest/', + 'only_matching': True, + }] + + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/0xpHIR6IB_default/index.html?videoId=%s' + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + fusion_metadata = self._search_json(r'Fusion\.globalContent\s*=', webpage, 'fusion metadata', display_id) + + entries = [] + for item in traverse_obj(fusion_metadata, 'content_elements') or []: + item_type = traverse_obj(item, 'subtype') + if item_type == 'video': + brightcove_config = traverse_obj(item, ('embed', 'config')) + brightcove_url = self.BRIGHTCOVE_URL_TEMPLATE % ( + traverse_obj(brightcove_config, 'brightcoveAccount') or '963482464001', + traverse_obj(brightcove_config, 'brightcoveVideoId') + ) + entries.append(self.url_result(brightcove_url, BrightcoveNewIE)) + elif item_type == 'youtube': + video_id_or_url = traverse_obj(item, ('referent', 'id'), ('raw_oembed', '_id')) + if video_id_or_url: + entries.append(self.url_result(video_id_or_url, ie='Youtube')) + + if not entries: + raise ExtractorError('This article does not have a video.', expected=True) + + playlist_title = ( + traverse_obj(fusion_metadata, ('headlines', 'basic')) + or self._og_search_title(webpage) + or self._html_extract_title(webpage) + ) + return self.playlist_result(entries, display_id, playlist_title)