From 0db3d0892e2c3a54ea5927bdeb97f1dcc03166d1 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Sun, 3 Aug 2025 17:10:51 +0100 Subject: [PATCH 1/2] gh-137314: Fix incorrect treatment of format specs in raw fstrings (GH-137328) (cherry picked from commit 0153d82a5ab0c6ac16c046bdd4438ea11b58d59d) Co-authored-by: Pablo Galindo Salgado --- Lib/test/test_fstring.py | 28 +++++++++++++++++++ ...-08-02-23-04-57.gh-issue-137314.wjEdzD.rst | 5 ++++ Parser/action_helpers.c | 10 ++++++- 3 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-08-02-23-04-57.gh-issue-137314.wjEdzD.rst diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index c2ab603a8a775c..5e743d34d2a573 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -1821,6 +1821,34 @@ def test_newlines_in_format_specifiers(self): for case in valid_cases: compile(case, "", "exec") + def test_raw_fstring_format_spec(self): + # Test raw f-string format spec behavior (Issue #137314). + # + # Raw f-strings should preserve literal backslashes in format specifications, + # not interpret them as escape sequences. + class UnchangedFormat: + """Test helper that returns the format spec unchanged.""" + def __format__(self, format): + return format + + # Test basic escape sequences + self.assertEqual(f"{UnchangedFormat():\xFF}", 'ÿ') + self.assertEqual(rf"{UnchangedFormat():\xFF}", '\\xFF') + + # Test nested expressions with raw/non-raw combinations + self.assertEqual(rf"{UnchangedFormat():{'\xFF'}}", 'ÿ') + self.assertEqual(f"{UnchangedFormat():{r'\xFF'}}", '\\xFF') + self.assertEqual(rf"{UnchangedFormat():{r'\xFF'}}", '\\xFF') + + # Test continuation character in format specs + self.assertEqual(f"""{UnchangedFormat():{'a'\ + 'b'}}""", 'ab') + self.assertEqual(rf"""{UnchangedFormat():{'a'\ + 'b'}}""", 'ab') + + # Test multiple format specs in same raw f-string + self.assertEqual(rf"{UnchangedFormat():\xFF} {UnchangedFormat():\n}", '\\xFF \\n') + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-08-02-23-04-57.gh-issue-137314.wjEdzD.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-02-23-04-57.gh-issue-137314.wjEdzD.rst new file mode 100644 index 00000000000000..09d0c3e68fc1ed --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-02-23-04-57.gh-issue-137314.wjEdzD.rst @@ -0,0 +1,5 @@ +Fixed a regression where raw f-strings incorrectly interpreted +escape sequences in format specifications. Raw f-strings now properly preserve +literal backslashes in format specs, matching the behavior from Python 3.11. +For example, ``rf"{obj:\xFF}"`` now correctly produces ``'\\xFF'`` instead of +``'ÿ'``. Patch by Pablo Galindo. diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index 99dd0976c6137d..6a8aa4ef044f8e 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -1369,7 +1369,15 @@ expr_ty _PyPegen_decoded_constant_from_token(Parser* p, Token* tok) { if (PyBytes_AsStringAndSize(tok->bytes, &bstr, &bsize) == -1) { return NULL; } - PyObject* str = _PyPegen_decode_string(p, 0, bstr, bsize, tok); + + // Check if we're inside a raw f-string for format spec decoding + int is_raw = 0; + if (INSIDE_FSTRING(p->tok)) { + tokenizer_mode *mode = TOK_GET_MODE(p->tok); + is_raw = mode->raw; + } + + PyObject* str = _PyPegen_decode_string(p, is_raw, bstr, bsize, tok); if (str == NULL) { return NULL; } From f0e342304b6ce6a29e2d03d8ca73081957166bde Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Sun, 3 Aug 2025 17:35:45 +0100 Subject: [PATCH 2/2] fixup! gh-137314: Fix incorrect treatment of format specs in raw fstrings (GH-137328) (cherry picked from commit 0153d82a5ab0c6ac16c046bdd4438ea11b58d59d) --- Parser/action_helpers.c | 3 ++- Parser/lexer/lexer.c | 16 ---------------- Parser/lexer/state.h | 16 ++++++++++++++++ 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index 6a8aa4ef044f8e..6f841ef2e4f9c3 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -1,6 +1,7 @@ #include #include "pegen.h" +#include "lexer/state.h" #include "string_parser.h" #include "pycore_runtime.h" // _PyRuntime #include "pycore_pystate.h" // _PyInterpreterState_GET() @@ -1374,7 +1375,7 @@ expr_ty _PyPegen_decoded_constant_from_token(Parser* p, Token* tok) { int is_raw = 0; if (INSIDE_FSTRING(p->tok)) { tokenizer_mode *mode = TOK_GET_MODE(p->tok); - is_raw = mode->raw; + is_raw = mode->f_string_raw; } PyObject* str = _PyPegen_decode_string(p, is_raw, bstr, bsize, tok); diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index d29b0bbd25d2ab..384239bd414c38 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -22,22 +22,6 @@ || c == '_'\ || (c >= 128)) -#ifdef Py_DEBUG -static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) { - assert(tok->tok_mode_stack_index >= 0); - assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL); - return &(tok->tok_mode_stack[tok->tok_mode_stack_index]); -} -static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) { - assert(tok->tok_mode_stack_index >= 0); - assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL); - return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]); -} -#else -#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index])) -#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index])) -#endif - #define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end) #define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\ _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end)) diff --git a/Parser/lexer/state.h b/Parser/lexer/state.h index 9ed3babfdbfbf1..d8791d89dd02db 100644 --- a/Parser/lexer/state.h +++ b/Parser/lexer/state.h @@ -1,6 +1,7 @@ #ifndef _PY_LEXER_H_ #define _PY_LEXER_H_ +#include "Python.h" #include "object.h" #define MAXINDENT 100 /* Max indentation level */ @@ -138,5 +139,20 @@ void _PyTokenizer_Free(struct tok_state *); void _PyToken_Free(struct token *); void _PyToken_Init(struct token *); +#ifdef Py_DEBUG +static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) { + assert(tok->tok_mode_stack_index >= 0); + assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL); + return &(tok->tok_mode_stack[tok->tok_mode_stack_index]); +} +static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) { + assert(tok->tok_mode_stack_index >= 0); + assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL); + return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]); +} +#else +#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index])) +#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index])) +#endif #endif pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy