From ad54c9130e793ce433bf9da334fa80df9f3aee58 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 21 Jun 2023 09:21:20 +0530 Subject: [PATCH] [cleanup] Misc Closes #6288, Closes #7197, Closes #7265, Closes #7353, Closes #5773 Authored by: mikf, freezboltz, pukkandan --- .github/workflows/potential-duplicates.yml | 2 +- README.md | 28 +++++---- devscripts/changelog_override.json | 27 +++++++++ devscripts/cli_to_api.py | 4 +- devscripts/make_changelog.py | 14 ++--- test/test_YoutubeDL.py | 16 +++--- test/test_jsinterp.py | 67 ++++++++++++---------- test/test_youtube_signature.py | 2 +- yt_dlp/YoutubeDL.py | 20 ++++--- yt_dlp/cookies.py | 4 ++ yt_dlp/downloader/common.py | 1 - yt_dlp/downloader/niconico.py | 4 +- yt_dlp/extractor/ciscowebex.py | 4 +- yt_dlp/extractor/common.py | 3 +- yt_dlp/extractor/dumpert.py | 0 yt_dlp/extractor/globalplayer.py | 0 yt_dlp/extractor/odnoklassniki.py | 6 +- yt_dlp/extractor/tvp.py | 4 +- yt_dlp/extractor/vidio.py | 2 +- yt_dlp/extractor/youtube.py | 10 ++-- yt_dlp/options.py | 6 +- yt_dlp/utils/_legacy.py | 6 +- yt_dlp/utils/_utils.py | 10 +--- 23 files changed, 138 insertions(+), 102 deletions(-) mode change 100755 => 100644 yt_dlp/extractor/dumpert.py mode change 100755 => 100644 yt_dlp/extractor/globalplayer.py diff --git a/.github/workflows/potential-duplicates.yml b/.github/workflows/potential-duplicates.yml index 1521ae20c0..cfc5831864 100644 --- a/.github/workflows/potential-duplicates.yml +++ b/.github/workflows/potential-duplicates.yml @@ -12,7 +12,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} label: potential-duplicate state: all - threshold: 0.7 + threshold: 0.3 comment: | This issue is potentially a duplicate of one of the following issues: {{#issues}} diff --git a/README.md b/README.md index 9a00da9035..d9a5e6cefc 100644 --- a/README.md +++ b/README.md @@ -152,7 +152,7 @@ ### Differences in default behavior * The upload dates extracted from YouTube are in UTC [when available](https://github.com/yt-dlp/yt-dlp/blob/89e4d86171c7b7c997c77d4714542e0383bf0db0/yt_dlp/extractor/youtube.py#L3898-L3900). Use `--compat-options no-youtube-prefer-utc-upload-date` to prefer the non-UTC upload date. * If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this * Thumbnail embedding in `mp4` is done with mutagen if possible. Use `--compat-options embed-thumbnail-atomicparsley` to force the use of AtomicParsley instead -* Some private fields such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this +* Some internal metadata such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this * When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this * `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi` * yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior @@ -251,7 +251,7 @@ #### Misc ``` -**Note**: The manpages, shell completion files etc. are available inside the [source tarball](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) +**Note**: The manpages, shell completion (autocomplete) files etc. are available inside the [source tarball](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) ## DEPENDENCIES Python versions 3.7+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly. @@ -699,9 +699,8 @@ ## Filesystem Options: --write-description etc. (default) --no-write-playlist-metafiles Do not write playlist metadata when using --write-info-json, --write-description etc. - --clean-info-json Remove some private fields such as filenames - from the infojson. Note that it could still - contain some personal information (default) + --clean-info-json Remove some internal metadata such as + filenames from the infojson (default) --no-clean-info-json Write all fields to the infojson --write-comments Retrieve video comments to be placed in the infojson. The comments are fetched even @@ -1041,13 +1040,10 @@ ## Post-Processing Options: that of --use-postprocessor (default: after_move). Same syntax as the output template can be used to pass any field as - arguments to the command. After download, an - additional field "filepath" that contains - the final path of the downloaded file is - also available, and if no fields are passed, - %(filepath,_filename|)q is appended to the - end of the command. This option can be used - multiple times + arguments to the command. If no fields are + passed, %(filepath,_filename|)q is appended + to the end of the command. This option can + be used multiple times --no-exec Remove any previously defined --exec --convert-subs FORMAT Convert the subtitles to another format (currently supported: ass, lrc, srt, vtt) @@ -1225,8 +1221,7 @@ ### Authentication with netrc The default location of the .netrc file is `~` (see below). -As an alternative to using the `.netrc` file, which has the disadvantage of keeping your passwords in a plain text file, you can configure a custom shell command to provide the credentials for an extractor. This is done by providing the `--netrc-cmd` parameter, it shall output the credentials in the netrc format and return `0` on success, other values will be treated as an error. `{}` in the command will be replaced by the name of the extractor to make it possible to select the credentials for the right extractor. -To use braces in the command, they need to be escaped by doubling them. (see example bellow) +As an alternative to using the `.netrc` file, which has the disadvantage of keeping your passwords in a plain text file, you can configure a custom shell command to provide the credentials for an extractor. This is done by providing the `--netrc-cmd` parameter, it shall output the credentials in the netrc format and return `0` on success, other values will be treated as an error. `{}` in the command will be replaced by the name of the extractor to make it possible to select the credentials for the right extractor (To use literal braces, double them like `{{}}`). E.g. To use an encrypted `.netrc` file stored as `.authinfo.gpg` ``` @@ -1389,7 +1384,10 @@ # OUTPUT TEMPLATE - `subtitles_table` (table): The subtitle format table as printed by `--list-subs` - `automatic_captions_table` (table): The automatic subtitle format table as printed by `--list-subs` + Available only after the video is downloaded (`post_process`/`after_move`): + - `filepath`: Actual path of downloaded video file + Available only in `--sponsorblock-chapter-title`: - `start_time` (numeric): Start time of the chapter in seconds @@ -1435,7 +1433,7 @@ # Download YouTube playlist videos in separate directories according to their up $ yt-dlp -o "%(upload_date>%Y)s/%(title)s.%(ext)s" "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re" # Prefix playlist index with " - " separator, but only if it is available -$ yt-dlp -o '%(playlist_index|)s%(playlist_index& - |)s%(title)s.%(ext)s' BaW_jenozKc "https://www.youtube.com/user/TheLinuxFoundation/playlists" +$ yt-dlp -o "%(playlist_index&{} - |)s%(title)s.%(ext)s" BaW_jenozKc "https://www.youtube.com/user/TheLinuxFoundation/playlists" # Download all playlists of YouTube channel/user keeping each playlist in separate directory: $ yt-dlp -o "%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s" "https://www.youtube.com/user/TheLinuxFoundation/playlists" diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index e5c9d1aa21..73225bdb90 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -8,5 +8,32 @@ "action": "add", "when": "776d1c3f0c9b00399896dd2e40e78e9a43218109", "short": "[priority] **YouTube throttling fixes!**" + }, + { + "action": "remove", + "when": "2e023649ea4e11151545a34dc1360c114981a236" + }, + { + "action": "add", + "when": "01aba2519a0884ef17d5f85608dbd2a455577147", + "short": "[priority] YouTube: Improved throttling and signature fixes" + }, + { + "action": "change", + "when": "c86e433c35fe5da6cb29f3539eef97497f84ed38", + "short": "[extractor/niconico:series] Fix extraction (#6898)", + "authors": ["sqrtNOT"] + }, + { + "action": "change", + "when": "69a40e4a7f6caa5662527ebd2f3c4e8aa02857a2", + "short": "[extractor/youtube:music_search_url] Extract title (#7102)", + "authors": ["kangalio"] + }, + { + "action": "change", + "when": "8417f26b8a819cd7ffcd4e000ca3e45033e670fb", + "short": "Add option `--color` (#6904)", + "authors": ["Grub4K"] } ] diff --git a/devscripts/cli_to_api.py b/devscripts/cli_to_api.py index b8b7cbcf1d..2aa51eb6e9 100644 --- a/devscripts/cli_to_api.py +++ b/devscripts/cli_to_api.py @@ -19,11 +19,11 @@ def parse_patched_options(opts): 'extract_flat': False, 'concat_playlist': 'never', }) - yt_dlp.options.__dict__['create_parser'] = lambda: patched_parser + yt_dlp.options.create_parser = lambda: patched_parser try: return yt_dlp.parse_options(opts) finally: - yt_dlp.options.__dict__['create_parser'] = create_parser + yt_dlp.options.create_parser = create_parser default_opts = parse_patched_options([]).ydl_opts diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py index 1b7e251ee9..2fcdc06d77 100644 --- a/devscripts/make_changelog.py +++ b/devscripts/make_changelog.py @@ -44,7 +44,7 @@ def commit_lookup(cls): return { name: group for group, names in { - cls.PRIORITY: {''}, + cls.PRIORITY: {'priority'}, cls.CORE: { 'aes', 'cache', @@ -68,7 +68,7 @@ def commit_lookup(cls): 'misc', 'test', }, - cls.EXTRACTOR: {'extractor', 'extractors'}, + cls.EXTRACTOR: {'extractor'}, cls.DOWNLOADER: {'downloader'}, cls.POSTPROCESSOR: {'postprocessor'}, }.items() @@ -323,7 +323,7 @@ def apply_overrides(self, overrides): logger.debug(f'Ignored {when!r}, not in commits {self._start!r}') continue - override_hash = override.get('hash') + override_hash = override.get('hash') or when if override['action'] == 'add': commit = Commit(override.get('hash'), override['short'], override.get('authors') or []) logger.info(f'ADD {commit}') @@ -337,7 +337,7 @@ def apply_overrides(self, overrides): elif override['action'] == 'change': if override_hash not in self._commits: continue - commit = Commit(override_hash, override['short'], override['authors']) + commit = Commit(override_hash, override['short'], override.get('authors') or []) logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}') self._commits[commit.hash] = commit @@ -348,7 +348,7 @@ def groups(self): for commit in self: upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short) if upstream_re: - commit.short = f'[upstream] Merged with youtube-dl {upstream_re.group(1)}' + commit.short = f'[core/upstream] Merged with youtube-dl {upstream_re.group(1)}' match = self.MESSAGE_RE.fullmatch(commit.short) if not match: @@ -394,10 +394,10 @@ def details_from_prefix(prefix): return CommitGroup.CORE, None, () prefix, _, details = prefix.partition('/') - prefix = prefix.strip().lower() + prefix = prefix.strip() details = details.strip() - group = CommitGroup.get(prefix) + group = CommitGroup.get(prefix.lower()) if group is CommitGroup.PRIORITY: prefix, _, details = details.partition('/') diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index ccc9e36f34..05dd3ed412 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -668,7 +668,7 @@ def test(tmpl, expected, *, info=None, **params): for (name, got), expect in zip((('outtmpl', out), ('filename', fname)), expected): if callable(expect): self.assertTrue(expect(got), f'Wrong {name} from {tmpl}') - else: + elif expect is not None: self.assertEqual(got, expect, f'Wrong {name} from {tmpl}') # Side-effects @@ -759,15 +759,17 @@ def expect_same_infodict(out): test('a%(width|b)d', 'ab', outtmpl_na_placeholder='none') FORMATS = self.outtmpl_info['formats'] - sanitize = lambda x: x.replace(':', ':').replace('"', """).replace('\n', ' ') # Custom type casting test('%(formats.:.id)l', 'id 1, id 2, id 3') test('%(formats.:.id)#l', ('id 1\nid 2\nid 3', 'id 1 id 2 id 3')) test('%(ext)l', 'mp4') test('%(formats.:.id) 18l', ' id 1, id 2, id 3') - test('%(formats)j', (json.dumps(FORMATS), sanitize(json.dumps(FORMATS)))) - test('%(formats)#j', (json.dumps(FORMATS, indent=4), sanitize(json.dumps(FORMATS, indent=4)))) + test('%(formats)j', (json.dumps(FORMATS), None)) + test('%(formats)#j', ( + json.dumps(FORMATS, indent=4), + json.dumps(FORMATS, indent=4).replace(':', ':').replace('"', """).replace('\n', ' ') + )) test('%(title5).3B', 'á') test('%(title5)U', 'áéí 𝐀') test('%(title5)#U', 'a\u0301e\u0301i\u0301 𝐀') @@ -792,8 +794,8 @@ def expect_same_infodict(out): test('%(title|%)s %(title|%%)s', '% %%') test('%(id+1-height+3)05d', '00158') test('%(width+100)05d', 'NA') - test('%(formats.0) 15s', ('% 15s' % FORMATS[0], '% 15s' % sanitize(str(FORMATS[0])))) - test('%(formats.0)r', (repr(FORMATS[0]), sanitize(repr(FORMATS[0])))) + test('%(formats.0) 15s', ('% 15s' % FORMATS[0], None)) + test('%(formats.0)r', (repr(FORMATS[0]), None)) test('%(height.0)03d', '001') test('%(-height.0)04d', '-001') test('%(formats.-1.id)s', FORMATS[-1]['id']) @@ -805,7 +807,7 @@ def expect_same_infodict(out): out = json.dumps([{'id': f['id'], 'height.:2': str(f['height'])[:2]} if 'height' in f else {'id': f['id']} for f in FORMATS]) - test('%(formats.:.{id,height.:2})j', (out, sanitize(out))) + test('%(formats.:.{id,height.:2})j', (out, None)) test('%(formats.:.{id,height}.id)l', ', '.join(f['id'] for f in FORMATS)) test('%(.{id,title})j', ('{"id": "1234"}', '{"id": "1234"}')) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index b01477e6ff..e9682ddab0 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -12,28 +12,38 @@ from yt_dlp.jsinterp import JS_Undefined, JSInterpreter +class NaN: + pass + + class TestJSInterpreter(unittest.TestCase): - def _test(self, code, ret, func='f', args=()): - self.assertEqual(JSInterpreter(code).call_function(func, *args), ret) + def _test(self, jsi_or_code, expected, func='f', args=()): + if isinstance(jsi_or_code, str): + jsi_or_code = JSInterpreter(jsi_or_code) + got = jsi_or_code.call_function(func, *args) + if expected is NaN: + self.assertTrue(math.isnan(got), f'{got} is not NaN') + else: + self.assertEqual(got, expected) def test_basic(self): jsi = JSInterpreter('function f(){;}') self.assertEqual(repr(jsi.extract_function('f')), 'F') - self.assertEqual(jsi.call_function('f'), None) + self._test(jsi, None) self._test('function f(){return 42;}', 42) self._test('function f(){42}', None) self._test('var f = function(){return 42;}', 42) - def test_calc(self): - self._test('function f(a){return 2*a+1;}', 7, args=[3]) - def test_div(self): jsi = JSInterpreter('function f(a, b){return a / b;}') - self.assertTrue(math.isnan(jsi.call_function('f', 0, 0))) - self.assertTrue(math.isnan(jsi.call_function('f', JS_Undefined, 1))) - self.assertTrue(math.isinf(jsi.call_function('f', 2, 0))) - self.assertEqual(jsi.call_function('f', 0, 3), 0) + self._test(jsi, NaN, args=(0, 0)) + self._test(jsi, NaN, args=(JS_Undefined, 1)) + self._test(jsi, float('inf'), args=(2, 0)) + self._test(jsi, 0, args=(0, 3)) + + def test_calc(self): + self._test('function f(a){return 2*a+1;}', 7, args=[3]) def test_empty_return(self): self._test('function f(){return; y()}', None) @@ -102,16 +112,15 @@ def test_precedence(self): ''', [20, 20, 30, 40, 50]) def test_builtins(self): - jsi = JSInterpreter('function f() { return NaN }') - self.assertTrue(math.isnan(jsi.call_function('f'))) + self._test('function f() { return NaN }', NaN) def test_date(self): self._test('function f() { return new Date("Wednesday 31 December 1969 18:01:26 MDT") - 0; }', 86000) jsi = JSInterpreter('function f(dt) { return new Date(dt) - 0; }') - self.assertEqual(jsi.call_function('f', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000) - self.assertEqual(jsi.call_function('f', '12/31/1969 18:01:26 MDT'), 86000) # m/d/y - self.assertEqual(jsi.call_function('f', '1 January 1970 00:00:00 UTC'), 0) + self._test(jsi, 86000, args=['Wednesday 31 December 1969 18:01:26 MDT']) + self._test(jsi, 86000, args=['12/31/1969 18:01:26 MDT']) # m/d/y + self._test(jsi, 0, args=['1 January 1970 00:00:00 UTC']) def test_call(self): jsi = JSInterpreter(''' @@ -119,8 +128,8 @@ def test_call(self): function y(a) { return x() + (a?a:0); } function z() { return y(3); } ''') - self.assertEqual(jsi.call_function('z'), 5) - self.assertEqual(jsi.call_function('y'), 2) + self._test(jsi, 5, func='z') + self._test(jsi, 2, func='y') def test_if(self): self._test(''' @@ -167,9 +176,9 @@ def test_switch(self): default:x=0; } return x } ''') - self.assertEqual(jsi.call_function('f', 1), 7) - self.assertEqual(jsi.call_function('f', 3), 6) - self.assertEqual(jsi.call_function('f', 5), 0) + self._test(jsi, 7, args=[1]) + self._test(jsi, 6, args=[3]) + self._test(jsi, 0, args=[5]) def test_switch_default(self): jsi = JSInterpreter(''' @@ -182,9 +191,9 @@ def test_switch_default(self): case 1: x+=1; } return x } ''') - self.assertEqual(jsi.call_function('f', 1), 2) - self.assertEqual(jsi.call_function('f', 5), 11) - self.assertEqual(jsi.call_function('f', 9), 14) + self._test(jsi, 2, args=[1]) + self._test(jsi, 11, args=[5]) + self._test(jsi, 14, args=[9]) def test_try(self): self._test('function f() { try{return 10} catch(e){return 5} }', 10) @@ -312,12 +321,12 @@ def test_replace(self): def test_char_code_at(self): jsi = JSInterpreter('function f(i){return "test".charCodeAt(i)}') - self.assertEqual(jsi.call_function('f', 0), 116) - self.assertEqual(jsi.call_function('f', 1), 101) - self.assertEqual(jsi.call_function('f', 2), 115) - self.assertEqual(jsi.call_function('f', 3), 116) - self.assertEqual(jsi.call_function('f', 4), None) - self.assertEqual(jsi.call_function('f', 'not_a_number'), 116) + self._test(jsi, 116, args=[0]) + self._test(jsi, 101, args=[1]) + self._test(jsi, 115, args=[2]) + self._test(jsi, 116, args=[3]) + self._test(jsi, None, args=[4]) + self._test(jsi, 116, args=['not_a_number']) def test_bitwise_operators_overflow(self): self._test('function f(){return -524999584 << 5}', 379882496) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 6759d2c467..811f70e689 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -67,7 +67,7 @@ 'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js', '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', 'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0', - ) + ), ] _NSIG_TESTS = [ diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index e51bceef34..7a5e593232 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -259,7 +259,7 @@ class YoutubeDL: consoletitle: Display progress in console window's titlebar. writedescription: Write the video description to a .description file writeinfojson: Write the video description to a .info.json file - clean_infojson: Remove private fields from the infojson + clean_infojson: Remove internal metadata from the infojson getcomments: Extract video comments. This will not be written to disk unless writeinfojson is also given writeannotations: Write the video annotations to a .annotations.xml file @@ -1902,7 +1902,7 @@ def __process_playlist(self, ie_result, download): continue entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip') - if not lazy and 'playlist-index' in self.params.get('compat_opts', []): + if not lazy and 'playlist-index' in self.params['compat_opts']: playlist_index = ie_result['requested_entries'][i] entry_copy = collections.ChainMap(entry, { @@ -2959,8 +2959,7 @@ def print_field(field, actual_field=None, optional=False): print_field('url', 'urls') print_field('thumbnail', optional=True) print_field('description', optional=True) - if filename: - print_field('filename') + print_field('filename') if self.params.get('forceduration') and info_copy.get('duration') is not None: self.to_stdout(formatSeconds(info_copy['duration'])) print_field('format') @@ -3185,7 +3184,6 @@ def existing_video_file(*filepaths): return if info_dict.get('requested_formats') is not None: - requested_formats = info_dict['requested_formats'] old_ext = info_dict['ext'] if self.params.get('merge_output_format') is None: if (info_dict['ext'] == 'webm' @@ -3212,6 +3210,7 @@ def correct_ext(filename, ext=new_ext): full_filename = correct_ext(full_filename) temp_filename = correct_ext(temp_filename) dl_filename = existing_video_file(full_filename, temp_filename) + info_dict['__real_download'] = False merger = FFmpegMergerPP(self) @@ -3219,12 +3218,12 @@ def correct_ext(filename, ext=new_ext): if dl_filename is not None: self.report_file_already_downloaded(dl_filename) elif fd: - for f in requested_formats if fd != FFmpegFD else []: + for f in info_dict['requested_formats'] if fd != FFmpegFD else []: f['filepath'] = fname = prepend_extension( correct_ext(temp_filename, info_dict['ext']), 'f%s' % f['format_id'], info_dict['ext']) downloaded.append(fname) - info_dict['url'] = '\n'.join(f['url'] for f in requested_formats) + info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats']) success, real_download = self.dl(temp_filename, info_dict) info_dict['__real_download'] = real_download else: @@ -3248,7 +3247,7 @@ def correct_ext(filename, ext=new_ext): f'You have requested downloading multiple formats to stdout {reason}. ' 'The formats will be streamed one after the other') fname = temp_filename - for f in requested_formats: + for f in info_dict['requested_formats']: new_info = dict(info_dict) del new_info['requested_formats'] new_info.update(f) @@ -4109,8 +4108,11 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None ret.append((thumb_filename, thumb_filename_final)) t['filepath'] = thumb_filename except network_exceptions as err: + if isinstance(err, urllib.error.HTTPError) and err.code == 404: + self.to_screen(f'[info] {thumb_display_id.title()} does not exist') + else: + self.report_warning(f'Unable to download {thumb_display_id}: {err}') thumbnails.pop(idx) - self.report_warning(f'Unable to download {thumb_display_id}: {err}') if ret and not write_all: break return ret diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 8693e0b4ad..f21e4f7e7b 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -1326,3 +1326,7 @@ def get_cookie_header(self, url): cookie_req = urllib.request.Request(escape_url(sanitize_url(url))) self.add_cookie_header(cookie_req) return cookie_req.get_header('Cookie') + + def clear(self, *args, **kwargs): + with contextlib.suppress(KeyError): + return super().clear(*args, **kwargs) diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 477ec3c8a0..a0219a3509 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -49,7 +49,6 @@ class FileDownloader: verbose: Print additional info to stdout. quiet: Do not print messages to stdout. ratelimit: Download speed limit, in bytes/sec. - continuedl: Attempt to continue downloads if possible throttledratelimit: Assume the download is being throttled below this speed (bytes/sec) retries: Number of times to retry for expected network errors. Default is 0 for API, but 10 for CLI diff --git a/yt_dlp/downloader/niconico.py b/yt_dlp/downloader/niconico.py index cfe7397845..7d8575c2a4 100644 --- a/yt_dlp/downloader/niconico.py +++ b/yt_dlp/downloader/niconico.py @@ -7,9 +7,9 @@ from .external import FFmpegFD from ..utils import ( DownloadError, - str_or_none, - sanitized_Request, WebSocketsWrapper, + sanitized_Request, + str_or_none, try_get, ) diff --git a/yt_dlp/extractor/ciscowebex.py b/yt_dlp/extractor/ciscowebex.py index 0fcf022820..40430505d6 100644 --- a/yt_dlp/extractor/ciscowebex.py +++ b/yt_dlp/extractor/ciscowebex.py @@ -49,7 +49,7 @@ def _real_extract(self, url): 'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id), video_id, headers=headers, query={'siteurl': siteurl}, expected_status=(403, 429)) - if urlh.status == 403: + if urlh.getcode() == 403: if stream['code'] == 53004: self.raise_login_required() if stream['code'] == 53005: @@ -59,7 +59,7 @@ def _real_extract(self, url): 'This video is protected by a password, use the --video-password option', expected=True) raise ExtractorError(f'{self.IE_NAME} said: {stream["code"]} - {stream["message"]}', expected=True) - if urlh.status == 429: + if urlh.getcode() == 429: self.raise_login_required( f'{self.IE_NAME} asks you to solve a CAPTCHA. Solve CAPTCHA in browser and', method='cookies') diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index f11a673583..9662a7ee1c 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -17,6 +17,7 @@ import sys import time import types +import urllib.error import urllib.parse import urllib.request import xml.etree.ElementTree @@ -58,6 +59,7 @@ join_nonempty, js_to_json, mimetype2ext, + netrc_from_content, network_exceptions, orderedSet, parse_bitrate, @@ -72,7 +74,6 @@ smuggle_url, str_or_none, str_to_int, - netrc_from_content, strip_or_none, traverse_obj, truncate_string, diff --git a/yt_dlp/extractor/dumpert.py b/yt_dlp/extractor/dumpert.py old mode 100755 new mode 100644 diff --git a/yt_dlp/extractor/globalplayer.py b/yt_dlp/extractor/globalplayer.py old mode 100755 new mode 100644 diff --git a/yt_dlp/extractor/odnoklassniki.py b/yt_dlp/extractor/odnoklassniki.py index 0d0ad0bb86..e63714e846 100644 --- a/yt_dlp/extractor/odnoklassniki.py +++ b/yt_dlp/extractor/odnoklassniki.py @@ -238,10 +238,8 @@ class OdnoklassnikiIE(InfoExtractor): def _clear_cookies(self, cdn_url): # Direct http downloads will fail if CDN cookies are set # so we need to reset them after each format extraction - if self._get_cookies('https://notarealsubdomain.mycdn.me/'): - self.cookiejar.clear(domain='.mycdn.me') - if self._get_cookies(cdn_url): - self.cookiejar.clear(domain=urllib.parse.urlparse(cdn_url).hostname) + self.cookiejar.clear(domain='.mycdn.me') + self.cookiejar.clear(domain=urllib.parse.urlparse(cdn_url).hostname) @classmethod def _extract_embed_urls(cls, url, webpage): diff --git a/yt_dlp/extractor/tvp.py b/yt_dlp/extractor/tvp.py index 2aa0dd870a..c686044fa2 100644 --- a/yt_dlp/extractor/tvp.py +++ b/yt_dlp/extractor/tvp.py @@ -488,9 +488,9 @@ def _call_api(self, resource, video_id, query={}, **kwargs): f'{self._API_BASE_URL}/{resource}', video_id, query={'lang': 'pl', 'platform': 'BROWSER', **query}, expected_status=lambda x: is_valid(x) or 400 <= x < 500, **kwargs) - if is_valid(urlh.status): + if is_valid(urlh.getcode()): return document - raise ExtractorError(f'Woronicza said: {document.get("code")} (HTTP {urlh.status})') + raise ExtractorError(f'Woronicza said: {document.get("code")} (HTTP {urlh.getcode()})') def _parse_video(self, video, with_url=True): info_dict = traverse_obj(video, { diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index 770aa284da..23e1aaf202 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -39,7 +39,7 @@ def is_logged_in(): login_post, login_post_urlh = self._download_webpage_handle( self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form), expected_status=[302, 401]) - if login_post_urlh.status == 401: + if login_post_urlh.getcode() == 401: if get_element_by_class('onboarding-content-register-popup__title', login_post): raise ExtractorError( 'Unable to log in: The provided email has not registered yet.', expected=True) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 4daa4f50e9..11e47904a5 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -811,7 +811,7 @@ def _extract_badges(self, badge_list: list): 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM, 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW, 'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED, - 'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED + 'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED, } label_map = { @@ -821,7 +821,7 @@ def _extract_badges(self, badge_list: list): 'live': BadgeType.LIVE_NOW, 'premium': BadgeType.AVAILABILITY_PREMIUM, 'verified': BadgeType.VERIFIED, - 'official artist channel': BadgeType.VERIFIED + 'official artist channel': BadgeType.VERIFIED, } badges = [] @@ -3935,7 +3935,7 @@ def process_manifest_format(f, proto, client_name, itag): f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1)) if f['quality'] == -1 and f.get('height'): f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))]) - if self.get_param('verbose'): + if self.get_param('verbose') or all_formats: f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ') if f.get('fps') and f['fps'] <= 1: del f['fps'] @@ -4531,7 +4531,7 @@ def process_language(container, base_url, lang_code, sub_name, query): and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', []) ): upload_date = strftime_or_none( - self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date + self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date info['upload_date'] = upload_date for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]: @@ -5071,7 +5071,7 @@ def _get_uncropped(url): last_updated_unix = self._parse_time_text( self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text'))) - info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d') + info['modified_date'] = strftime_or_none(last_updated_unix) info['view_count'] = self._get_count(playlist_stats, 1) if info['view_count'] is None: # 0 is allowed diff --git a/yt_dlp/options.py b/yt_dlp/options.py index b174a24af7..9d6dbec9fc 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1414,8 +1414,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): '--clean-info-json', '--clean-infojson', action='store_true', dest='clean_infojson', default=None, help=( - 'Remove some private fields such as filenames from the infojson. ' - 'Note that it could still contain some personal information (default)')) + 'Remove some internal metadata such as filenames from the infojson (default)')) filesystem.add_option( '--no-clean-info-json', '--no-clean-infojson', action='store_false', dest='clean_infojson', @@ -1678,8 +1677,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): 'Execute a command, optionally prefixed with when to execute it, separated by a ":". ' 'Supported values of "WHEN" are the same as that of --use-postprocessor (default: after_move). ' 'Same syntax as the output template can be used to pass any field as arguments to the command. ' - 'After download, an additional field "filepath" that contains the final path of the downloaded file ' - 'is also available, and if no fields are passed, %(filepath,_filename|)q is appended to the end of the command. ' + 'If no fields are passed, %(filepath,_filename|)q is appended to the end of the command. ' 'This option can be used multiple times')) postproc.add_option( '--no-exec', diff --git a/yt_dlp/utils/_legacy.py b/yt_dlp/utils/_legacy.py index 1097778f0f..96ac468b1f 100644 --- a/yt_dlp/utils/_legacy.py +++ b/yt_dlp/utils/_legacy.py @@ -6,7 +6,7 @@ import urllib.parse import zlib -from ._utils import decode_base_n, preferredencoding +from ._utils import Popen, decode_base_n, preferredencoding from .traversal import traverse_obj from ..dependencies import certifi, websockets @@ -174,3 +174,7 @@ def handle_youtubedl_headers(headers): del filtered_headers['Youtubedl-no-compression'] return filtered_headers + + +def process_communicate_or_kill(p, *args, **kwargs): + return Popen.communicate_or_kill(p, *args, **kwargs) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 28c2785cb0..bc1bc9116c 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -872,12 +872,6 @@ def __init__(self, content): self._parse('-', stream, False) -def process_communicate_or_kill(p, *args, **kwargs): - deprecation_warning(f'"{__name__}.process_communicate_or_kill" is deprecated and may be removed ' - f'in a future version. Use "{__name__}.Popen.communicate_or_kill" instead') - return Popen.communicate_or_kill(p, *args, **kwargs) - - class Popen(subprocess.Popen): if sys.platform == 'win32': _startupinfo = subprocess.STARTUPINFO() @@ -1662,7 +1656,7 @@ def unified_strdate(date_str, day_first=True): def unified_timestamp(date_str, day_first=True): - if date_str is None: + if not isinstance(date_str, str): return None date_str = re.sub(r'\s+', ' ', re.sub( @@ -2454,7 +2448,7 @@ def request_to_url(req): return req -def strftime_or_none(timestamp, date_format, default=None): +def strftime_or_none(timestamp, date_format='%Y%m%d', default=None): datetime_object = None try: if isinstance(timestamp, (int, float)): # unix timestamp