From 00cdda4f6fe18712ced13dbc64b7ea10f323e268 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Sun, 24 Dec 2023 22:09:01 +0100 Subject: [PATCH] [core] Fix format selection parse error for CPython 3.12 (#8797) Authored by: Grub4K --- test/test_YoutubeDL.py | 2 ++ yt_dlp/YoutubeDL.py | 11 +++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 48c710e00..8bff08314 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -140,6 +140,8 @@ def test(inp, *expected, multi=False): test('example-with-dashes', 'example-with-dashes') test('all', '2', '47', '45', 'example-with-dashes', '35') test('mergeall', '2+47+45+example-with-dashes+35', multi=True) + # See: https://github.com/yt-dlp/yt-dlp/pulls/8797 + test('7_a/worst', '35') def test_format_selection_audio(self): formats = [ diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 0c07866e4..5e28fd0e2 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2465,9 +2465,16 @@ def final_selector(ctx): return selector_function(ctx_copy) return final_selector - stream = io.BytesIO(format_spec.encode()) + # HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid + # Prefix numbers with random letters to avoid it being classified as a number + # See: https://github.com/yt-dlp/yt-dlp/pulls/8797 + # TODO: Implement parser not reliant on tokenize.tokenize + prefix = ''.join(random.choices(string.ascii_letters, k=32)) + stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode()) try: - tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline))) + tokens = list(_remove_unused_ops( + token._replace(string=token.string.replace(prefix, '')) + for token in tokenize.tokenize(stream.readline))) except tokenize.TokenError: raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))