From bf7392922f801227b95103925e9df3ccc764a000 Mon Sep 17 00:00:00 2001 From: lkho Date: Sun, 30 Aug 2020 15:53:07 +0800 Subject: [PATCH] [duboku] fix list results, minor error checking --- youtube_dl/extractor/duboku.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/duboku.py b/youtube_dl/extractor/duboku.py index cd92f5cf1..136ee392e 100644 --- a/youtube_dl/extractor/duboku.py +++ b/youtube_dl/extractor/duboku.py @@ -101,7 +101,7 @@ def _real_extract(self, url): player_data = self._search_regex( self._PLAYER_DATA_PATTERN, webpage_html, 'player_data') - player_data = self._parse_json(js_to_json(player_data), video_id) + player_data = self._parse_json(player_data, video_id, js_to_json) # extract title @@ -121,8 +121,9 @@ def _real_extract(self, url): title = re.sub(r'[\s\r\n\t]+', ' ', title) break - data_url = player_data['url'] - assert data_url + data_url = player_data.get('url') + if not data_url: + raise ExtractorError('Cannot find url in player_data') data_from = player_data.get('from') # if it is an embedded iframe, maybe it's an external source @@ -225,7 +226,7 @@ def _real_extract(self, url): playlist = playlists.get(fragment) playlist_id = fragment else: - first = next(iter(playlists.items())) + first = next(iter(playlists.items()), None) if first: (playlist_id, playlist) = first if not playlist: @@ -235,5 +236,6 @@ def _real_extract(self, url): # return url results return self.playlist_result([ self.url_result( - 'https://www.duboku.co' + x['href'], DubokuIE.IE_NAME, video_title=x.get('title')) + compat_urlparse.urljoin('https://www.duboku.co', x['href']), + ie=DubokuIE.ie_key(), video_title=x.get('title')) for x in playlist], series_id + '#' + playlist_id, title)