From 50ac0e5416e0bdff21241852010cad4927e898d6 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Sun, 28 Aug 2022 22:59:54 +0000 Subject: [PATCH] [extractor/youtube] Use device-specific user agent (#4770) Thwart latest fingerprinting attempt (see https://github.com/iv-org/invidious/issues/3230#issuecomment-1226887639) Authored by: coletdjnz --- yt_dlp/extractor/youtube.py | 44 ++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 38e5faa794..f55a2760ff 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -110,8 +110,9 @@ 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'ANDROID', - 'clientVersion': '17.29.34', - 'androidSdkVersion': 30 + 'clientVersion': '17.31.35', + 'androidSdkVersion': 30, + 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip' } }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 3, @@ -122,8 +123,9 @@ 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'ANDROID_EMBEDDED_PLAYER', - 'clientVersion': '17.29.34', - 'androidSdkVersion': 30 + 'clientVersion': '17.31.35', + 'androidSdkVersion': 30, + 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip' }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 55, @@ -135,7 +137,8 @@ 'client': { 'clientName': 'ANDROID_MUSIC', 'clientVersion': '5.16.51', - 'androidSdkVersion': 30 + 'androidSdkVersion': 30, + 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip' } }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 21, @@ -146,8 +149,9 @@ 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'ANDROID_CREATOR', - 'clientVersion': '22.28.100', - 'androidSdkVersion': 30 + 'clientVersion': '22.30.100', + 'androidSdkVersion': 30, + 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip' }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 14, @@ -162,6 +166,7 @@ 'clientName': 'IOS', 'clientVersion': '17.30.1', 'deviceModel': 'iPhone14,3', + 'userAgent': 'com.google.ios.youtube/17.30.1 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)' } }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 5, @@ -173,6 +178,7 @@ 'clientName': 'IOS_MESSAGES_EXTENSION', 'clientVersion': '17.30.1', 'deviceModel': 'iPhone14,3', + 'userAgent': 'com.google.ios.youtube/17.30.1 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)' }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 66, @@ -555,7 +561,8 @@ def generate_api_headers( 'Origin': origin, 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg), 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg), - 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg) + 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg), + 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client) } if session_index is None: session_index = self._extract_session_index(ytcfg) @@ -3071,7 +3078,9 @@ def _is_agegated(player_response): def _is_unplayable(player_response): return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE' - def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr): + _STORY_PLAYER_PARAMS = '8AEB' + + def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data): session_index = self._extract_session_index(player_ytcfg, master_ytcfg) syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr) @@ -3081,8 +3090,10 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, yt_query = { 'videoId': video_id, - 'params': '8AEB' # enable stories } + if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android': + yt_query['params'] = self._STORY_PLAYER_PARAMS + yt_query.update(self._generate_player_context(sts)) return self._extract_response( item_id=video_id, ep='player', query=yt_query, @@ -3115,7 +3126,7 @@ def _get_requested_clients(self, url, smuggled_data): return orderedSet(requested_clients) - def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg): + def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data): initial_pr = None if webpage: initial_pr = self._search_json( @@ -3165,7 +3176,7 @@ def append_client(*client_names): try: pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response( - client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr) + client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data) except ExtractorError as e: if last_error: self.report_warning(last_error) @@ -3428,14 +3439,17 @@ def _extract_storyboard(self, player_responses, duration): def _download_player_responses(self, url, smuggled_data, video_id, webpage_url): webpage = None if 'webpage' not in self._configuration_arg('player_skip'): + query = {'bpctr': '9999999999', 'has_verified': '1'} + if smuggled_data.get('is_story'): + query['pp'] = self._STORY_PLAYER_PARAMS webpage = self._download_webpage( - webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False) + webpage_url, video_id, fatal=False, query=query) master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg() player_responses, player_url = self._extract_player_responses( self._get_requested_clients(url, smuggled_data), - video_id, webpage, master_ytcfg) + video_id, webpage, master_ytcfg, smuggled_data) return webpage, master_ytcfg, player_responses, player_url @@ -6008,7 +6022,7 @@ class YoutubeStoriesIE(InfoExtractor): def _real_extract(self, url): playlist_id = f'RLTD{self._match_id(url)}' return self.url_result( - f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', + smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}), ie=YoutubeTabIE, video_id=playlist_id)