From a5c5623470e25ea6430c74517521cf5815c4c14e Mon Sep 17 00:00:00 2001 From: Matthew Date: Wed, 3 Mar 2021 10:32:40 +0000 Subject: [PATCH] [YouTube] Use new browse API for continuation page extraction. (#131) Known issues (these issues existed in previous API as well) * Mix playlists only give 1 page (25 vids) * Trending only gives 1 video * History gives 5 pages (200 vids) Co-authored-by: colethedj, pukkandan --- yt_dlp/extractor/youtube.py | 72 +++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 34 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 4954a2d8c..7f514d35a 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals +import hashlib import itertools import json import os.path @@ -274,7 +275,7 @@ def _real_initialize(self): 'context': { 'client': { 'clientName': 'WEB', - 'clientVersion': '2.20201021.03.00', + 'clientVersion': '2.20210301.08.00', } }, } @@ -283,15 +284,28 @@ def _real_initialize(self): _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;' _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|= retries: self._downloader.report_error(last_error) - if not browse or not response: + + if not response: break known_continuation_renderers = { @@ -2936,7 +2952,7 @@ def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token): 'channel_id': metadata['uploader_id'], 'channel_url': metadata['uploader_url']}) return self.playlist_result( - self._entries(selected_tab, identity_token), + self._entries(selected_tab, identity_token, playlist_id), **metadata) def _extract_from_playlist(self, item_id, url, data, playlist): @@ -3223,26 +3239,14 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor): _TESTS = [] def _entries(self, query, n): - data = { - 'context': { - 'client': { - 'clientName': 'WEB', - 'clientVersion': '2.20201021.03.00', - } - }, - 'query': query, - } + data = {'query': query} if self._SEARCH_PARAMS: data['params'] = self._SEARCH_PARAMS total = 0 for page_num in itertools.count(1): - search = self._download_json( - 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', - video_id='query "%s"' % query, - note='Downloading page %s' % page_num, - errnote='Unable to download API page', fatal=False, - data=json.dumps(data).encode('utf8'), - headers={'content-type': 'application/json'}) + search = self._call_api( + ep='search', video_id='query "%s"' % query, fatal=False, + note='Downloading page %s' % page_num, query=data) if not search: break slr_contents = try_get( @@ -3394,8 +3398,8 @@ class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor): class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): - IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)' - _VALID_URL = r':ythistory' + IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)' + _VALID_URL = r':ythis(?:tory)?' _FEED_NAME = 'history' _TESTS = [{ 'url': ':ythistory',