diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index c2ab603a8a775c..5e743d34d2a573 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -1821,6 +1821,34 @@ def test_newlines_in_format_specifiers(self): for case in valid_cases: compile(case, "", "exec") + def test_raw_fstring_format_spec(self): + # Test raw f-string format spec behavior (Issue #137314). + # + # Raw f-strings should preserve literal backslashes in format specifications, + # not interpret them as escape sequences. + class UnchangedFormat: + """Test helper that returns the format spec unchanged.""" + def __format__(self, format): + return format + + # Test basic escape sequences + self.assertEqual(f"{UnchangedFormat():\xFF}", 'ÿ') + self.assertEqual(rf"{UnchangedFormat():\xFF}", '\\xFF') + + # Test nested expressions with raw/non-raw combinations + self.assertEqual(rf"{UnchangedFormat():{'\xFF'}}", 'ÿ') + self.assertEqual(f"{UnchangedFormat():{r'\xFF'}}", '\\xFF') + self.assertEqual(rf"{UnchangedFormat():{r'\xFF'}}", '\\xFF') + + # Test continuation character in format specs + self.assertEqual(f"""{UnchangedFormat():{'a'\ + 'b'}}""", 'ab') + self.assertEqual(rf"""{UnchangedFormat():{'a'\ + 'b'}}""", 'ab') + + # Test multiple format specs in same raw f-string + self.assertEqual(rf"{UnchangedFormat():\xFF} {UnchangedFormat():\n}", '\\xFF \\n') + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-08-02-23-04-57.gh-issue-137314.wjEdzD.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-02-23-04-57.gh-issue-137314.wjEdzD.rst new file mode 100644 index 00000000000000..09d0c3e68fc1ed --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-02-23-04-57.gh-issue-137314.wjEdzD.rst @@ -0,0 +1,5 @@ +Fixed a regression where raw f-strings incorrectly interpreted +escape sequences in format specifications. Raw f-strings now properly preserve +literal backslashes in format specs, matching the behavior from Python 3.11. +For example, ``rf"{obj:\xFF}"`` now correctly produces ``'\\xFF'`` instead of +``'ÿ'``. Patch by Pablo Galindo. diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index 99dd0976c6137d..6f841ef2e4f9c3 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -1,6 +1,7 @@ #include #include "pegen.h" +#include "lexer/state.h" #include "string_parser.h" #include "pycore_runtime.h" // _PyRuntime #include "pycore_pystate.h" // _PyInterpreterState_GET() @@ -1369,7 +1370,15 @@ expr_ty _PyPegen_decoded_constant_from_token(Parser* p, Token* tok) { if (PyBytes_AsStringAndSize(tok->bytes, &bstr, &bsize) == -1) { return NULL; } - PyObject* str = _PyPegen_decode_string(p, 0, bstr, bsize, tok); + + // Check if we're inside a raw f-string for format spec decoding + int is_raw = 0; + if (INSIDE_FSTRING(p->tok)) { + tokenizer_mode *mode = TOK_GET_MODE(p->tok); + is_raw = mode->f_string_raw; + } + + PyObject* str = _PyPegen_decode_string(p, is_raw, bstr, bsize, tok); if (str == NULL) { return NULL; } diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index d29b0bbd25d2ab..384239bd414c38 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -22,22 +22,6 @@ || c == '_'\ || (c >= 128)) -#ifdef Py_DEBUG -static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) { - assert(tok->tok_mode_stack_index >= 0); - assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL); - return &(tok->tok_mode_stack[tok->tok_mode_stack_index]); -} -static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) { - assert(tok->tok_mode_stack_index >= 0); - assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL); - return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]); -} -#else -#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index])) -#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index])) -#endif - #define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end) #define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\ _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end)) diff --git a/Parser/lexer/state.h b/Parser/lexer/state.h index 9ed3babfdbfbf1..d8791d89dd02db 100644 --- a/Parser/lexer/state.h +++ b/Parser/lexer/state.h @@ -1,6 +1,7 @@ #ifndef _PY_LEXER_H_ #define _PY_LEXER_H_ +#include "Python.h" #include "object.h" #define MAXINDENT 100 /* Max indentation level */ @@ -138,5 +139,20 @@ void _PyTokenizer_Free(struct tok_state *); void _PyToken_Free(struct token *); void _PyToken_Init(struct token *); +#ifdef Py_DEBUG +static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) { + assert(tok->tok_mode_stack_index >= 0); + assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL); + return &(tok->tok_mode_stack[tok->tok_mode_stack_index]); +} +static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) { + assert(tok->tok_mode_stack_index >= 0); + assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL); + return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]); +} +#else +#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index])) +#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index])) +#endif #endif pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy