Skip to content

[3.13] gh-137314: Fix incorrect treatment of format specs in raw fstrings (GH-137328) #137345

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions Lib/test/test_fstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -1821,6 +1821,34 @@ def test_newlines_in_format_specifiers(self):
for case in valid_cases:
compile(case, "<string>", "exec")

def test_raw_fstring_format_spec(self):
# Test raw f-string format spec behavior (Issue #137314).
#
# Raw f-strings should preserve literal backslashes in format specifications,
# not interpret them as escape sequences.
class UnchangedFormat:
"""Test helper that returns the format spec unchanged."""
def __format__(self, format):
return format

# Test basic escape sequences
self.assertEqual(f"{UnchangedFormat():\xFF}", 'ÿ')
self.assertEqual(rf"{UnchangedFormat():\xFF}", '\\xFF')

# Test nested expressions with raw/non-raw combinations
self.assertEqual(rf"{UnchangedFormat():{'\xFF'}}", 'ÿ')
self.assertEqual(f"{UnchangedFormat():{r'\xFF'}}", '\\xFF')
self.assertEqual(rf"{UnchangedFormat():{r'\xFF'}}", '\\xFF')

# Test continuation character in format specs
self.assertEqual(f"""{UnchangedFormat():{'a'\
'b'}}""", 'ab')
self.assertEqual(rf"""{UnchangedFormat():{'a'\
'b'}}""", 'ab')

# Test multiple format specs in same raw f-string
self.assertEqual(rf"{UnchangedFormat():\xFF} {UnchangedFormat():\n}", '\\xFF \\n')


if __name__ == '__main__':
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Fixed a regression where raw f-strings incorrectly interpreted
escape sequences in format specifications. Raw f-strings now properly preserve
literal backslashes in format specs, matching the behavior from Python 3.11.
For example, ``rf"{obj:\xFF}"`` now correctly produces ``'\\xFF'`` instead of
``'ÿ'``. Patch by Pablo Galindo.
11 changes: 10 additions & 1 deletion Parser/action_helpers.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <Python.h>

#include "pegen.h"
#include "lexer/state.h"
#include "string_parser.h"
#include "pycore_runtime.h" // _PyRuntime
#include "pycore_pystate.h" // _PyInterpreterState_GET()
Expand Down Expand Up @@ -1369,7 +1370,15 @@ expr_ty _PyPegen_decoded_constant_from_token(Parser* p, Token* tok) {
if (PyBytes_AsStringAndSize(tok->bytes, &bstr, &bsize) == -1) {
return NULL;
}
PyObject* str = _PyPegen_decode_string(p, 0, bstr, bsize, tok);

// Check if we're inside a raw f-string for format spec decoding
int is_raw = 0;
if (INSIDE_FSTRING(p->tok)) {
tokenizer_mode *mode = TOK_GET_MODE(p->tok);
is_raw = mode->f_string_raw;
}

PyObject* str = _PyPegen_decode_string(p, is_raw, bstr, bsize, tok);
if (str == NULL) {
return NULL;
}
Expand Down
16 changes: 0 additions & 16 deletions Parser/lexer/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,6 @@
|| c == '_'\
|| (c >= 128))

#ifdef Py_DEBUG
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
assert(tok->tok_mode_stack_index >= 0);
assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
}
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
assert(tok->tok_mode_stack_index >= 0);
assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
}
#else
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
#endif

#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
_PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
Expand Down
16 changes: 16 additions & 0 deletions Parser/lexer/state.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef _PY_LEXER_H_
#define _PY_LEXER_H_

#include "Python.h"
#include "object.h"

#define MAXINDENT 100 /* Max indentation level */
Expand Down Expand Up @@ -138,5 +139,20 @@ void _PyTokenizer_Free(struct tok_state *);
void _PyToken_Free(struct token *);
void _PyToken_Init(struct token *);

#ifdef Py_DEBUG
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
assert(tok->tok_mode_stack_index >= 0);
assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
}
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
assert(tok->tok_mode_stack_index >= 0);
assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
}
#else
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
#endif

#endif
Loading
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy