From 12f153a8275bd4c05aee1532b3eb00f1361c4636 Mon Sep 17 00:00:00 2001 From: Locke Date: Tue, 4 Oct 2022 10:59:05 +0800 Subject: [PATCH] [extractor/BilibiliSpace] Fix extractor, better error message (#5043) Closes #5038 Authored by: lockmatrix --- yt_dlp/extractor/bilibili.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 2e03aee85..5a5c79f29 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -4,6 +4,7 @@ import functools import math import re +import urllib from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( @@ -508,11 +509,11 @@ def _real_extract(self, url): class BilibiliSpaceBaseIE(InfoExtractor): def _extract_playlist(self, fetch_page, get_metadata, get_entries): - first_page = fetch_page(1) + first_page = fetch_page(0) metadata = get_metadata(first_page) paged_list = InAdvancePagedList( - lambda idx: get_entries(fetch_page(idx) if idx > 1 else first_page), + lambda idx: get_entries(fetch_page(idx) if idx else first_page), metadata['page_count'], metadata['page_size']) return metadata, paged_list @@ -535,10 +536,19 @@ def _real_extract(self, url): 'To download audios, add a "/audio" to the URL') def fetch_page(page_idx): - return self._download_json( - 'https://api.bilibili.com/x/space/arc/search', playlist_id, - note=f'Downloading page {page_idx}', - query={'mid': playlist_id, 'pn': page_idx, 'jsonp': 'jsonp'})['data'] + try: + response = self._download_json('https://api.bilibili.com/x/space/arc/search', + playlist_id, note=f'Downloading page {page_idx}', + query={'mid': playlist_id, 'pn': page_idx + 1, 'jsonp': 'jsonp'}) + except ExtractorError as e: + if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 412: + raise ExtractorError( + 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True) + raise + if response['code'] == -401: + raise ExtractorError( + 'Request is blocked by server (401), please add cookies, wait and try later.', expected=True) + return response['data'] def get_metadata(page_data): page_size = page_data['page']['ps'] @@ -573,7 +583,7 @@ def fetch_page(page_idx): return self._download_json( 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id, note=f'Downloading page {page_idx}', - query={'uid': playlist_id, 'pn': page_idx, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data'] + query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data'] def get_metadata(page_data): return { @@ -608,7 +618,7 @@ def fetch_page(page_idx): return self._download_json( 'https://api.bilibili.com/x/polymer/space/seasons_archives_list', playlist_id, note=f'Downloading page {page_idx}', - query={'mid': mid, 'season_id': sid, 'page_num': page_idx, 'page_size': 30})['data'] + query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data'] def get_metadata(page_data): page_size = page_data['page']['page_size']