From a31d0fa6c315b1145d682361149003d98f1e3782 Mon Sep 17 00:00:00 2001 From: "lauren n. liberda" Date: Sun, 12 Feb 2023 05:43:10 +0100 Subject: [PATCH] [extractor/tvp] Support `stream.tvp.pl` (#6139) Authored by: selfisekai --- yt_dlp/extractor/tvp.py | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/yt_dlp/extractor/tvp.py b/yt_dlp/extractor/tvp.py index 8483564f7..f8ded2646 100644 --- a/yt_dlp/extractor/tvp.py +++ b/yt_dlp/extractor/tvp.py @@ -268,8 +268,11 @@ def _real_extract(self, url): class TVPStreamIE(InfoExtractor): IE_NAME = 'tvp:stream' - _VALID_URL = r'(?:tvpstream:|https?://tvpstream\.vod\.tvp\.pl/(?:\?(?:[^&]+[&;])*channel_id=)?)(?P\d*)' + _VALID_URL = r'(?:tvpstream:|https?://(?:tvpstream\.vod|stream)\.tvp\.pl/(?:\?(?:[^&]+[&;])*channel_id=)?)(?P\d*)' _TESTS = [{ + 'url': 'https://stream.tvp.pl/?channel_id=56969941', + 'only_matching': True, + }, { # untestable as "video" id changes many times across a day 'url': 'https://tvpstream.vod.tvp.pl/?channel_id=1455', 'only_matching': True, @@ -285,28 +288,21 @@ class TVPStreamIE(InfoExtractor): 'only_matching': True, }] - _PLAYER_BOX_RE = r']*id\s*=\s*["\']?tvp_player_box["\']?[^>]+data-%s-id\s*=\s*["\']?(\d+)' - _BUTTON_RE = r']*data-channel-id=["\']?%s["\']?[^>]*\sdata-title=(?:"([^"]*)"|\'([^\']*)\')[^>]*\sdata-stationname=(?:"([^"]*)"|\'([^\']*)\')' - def _real_extract(self, url): channel_id = self._match_id(url) - channel_url = self._proto_relative_url('//tvpstream.vod.tvp.pl/?channel_id=%s' % channel_id or 'default') - webpage = self._download_webpage(channel_url, channel_id, 'Downloading channel webpage') - if not channel_id: - channel_id = self._search_regex(self._PLAYER_BOX_RE % 'channel', - webpage, 'default channel id') - video_id = self._search_regex(self._PLAYER_BOX_RE % 'video', - webpage, 'video id') - audition_title, station_name = self._search_regex( - self._BUTTON_RE % (re.escape(channel_id)), webpage, - 'audition title and station name', - group=(1, 2)) + channel_url = self._proto_relative_url('//stream.tvp.pl/?channel_id=%s' % channel_id or 'default') + webpage = self._download_webpage(channel_url, channel_id or 'default', 'Downloading channel webpage') + channels = self._search_json( + r'window\.__channels\s*=', webpage, 'channel list', channel_id, + contains_pattern=r'\[\s*{(?s:.+)}\s*]') + channel = traverse_obj(channels, (lambda _, v: channel_id == str(v['id'])), get_all=False) if channel_id else channels[0] + audition = traverse_obj(channel, ('items', lambda _, v: v['is_live'] is True), get_all=False) return { '_type': 'url_transparent', - 'id': channel_id, - 'url': 'tvp:%s' % video_id, - 'title': audition_title, - 'alt_title': station_name, + 'id': channel_id or channel['id'], + 'url': 'tvp:%s' % audition['video_id'], + 'title': audition.get('title'), + 'alt_title': channel.get('title'), 'is_live': True, 'ie_key': 'TVPEmbed', }