diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index dd58e032a8befe..89d425d6e27aa7 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -1651,6 +1651,18 @@ def __repr__(self): self.assertEqual(f"{1+2 = # my comment }", '1+2 = \n 3') + self.assertEqual(f'{""" # booo + """=}', '""" # booo\n """=\' # booo\\n \'') + + self.assertEqual(f'{" # nooo "=}', '" # nooo "=\' # nooo \'') + self.assertEqual(f'{" \" # nooo \" "=}', '" \\" # nooo \\" "=\' " # nooo " \'') + + self.assertEqual(f'{ # some comment goes here + """hello"""=}', ' \n """hello"""=\'hello\'') + self.assertEqual(f'{"""# this is not a comment + a""" # this is a comment + }', '# this is not a comment\n a') + # These next lines contains tabs. Backslash escapes don't # work in f-strings. # patchcheck doesn't like these tabs. So the only way to test diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-02-24-42.gh-issue-135148.r-t2sC.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-02-24-42.gh-issue-135148.r-t2sC.rst new file mode 100644 index 00000000000000..9b1f62433b45ed --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-02-24-42.gh-issue-135148.r-t2sC.rst @@ -0,0 +1,3 @@ +Fixed a bug where f-string debug expressions (using =) would incorrectly +strip out parts of strings containing escaped quotes and # characters. Patch +by Pablo Galindo. diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index 4d10bccf0a53f2..e71d7391d8a439 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -121,38 +121,88 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) { } PyObject *res = NULL; - // Check if there is a # character in the expression + // Look for a # character outside of string literals int hash_detected = 0; + int in_string = 0; + char quote_char = 0; + for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) { - if (tok_mode->last_expr_buffer[i] == '#') { + char ch = tok_mode->last_expr_buffer[i]; + + // Skip escaped characters + if (ch == '\\') { + i++; + continue; + } + + // Handle quotes + if (ch == '"' || ch == '\'') { + // The following if/else block works becase there is an off number + // of quotes in STRING tokens and the lexer only ever reaches this + // function with valid STRING tokens. + // For example: """hello""" + // First quote: in_string = 1 + // Second quote: in_string = 0 + // Third quote: in_string = 1 + if (!in_string) { + in_string = 1; + quote_char = ch; + } + else if (ch == quote_char) { + in_string = 0; + } + continue; + } + + // Check for # outside strings + if (ch == '#' && !in_string) { hash_detected = 1; break; } } - + // If we found a # character in the expression, we need to handle comments if (hash_detected) { - Py_ssize_t input_length = tok_mode->last_expr_size - tok_mode->last_expr_end; - char *result = (char *)PyMem_Malloc((input_length + 1) * sizeof(char)); + // Allocate buffer for processed result + char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char)); if (!result) { return -1; } - Py_ssize_t i = 0; - Py_ssize_t j = 0; + Py_ssize_t i = 0; // Input position + Py_ssize_t j = 0; // Output position + in_string = 0; // Whether we're in a string + quote_char = 0; // Current string quote char - for (i = 0, j = 0; i < input_length; i++) { - if (tok_mode->last_expr_buffer[i] == '#') { - // Skip characters until newline or end of string - while (i < input_length && tok_mode->last_expr_buffer[i] != '\0') { - if (tok_mode->last_expr_buffer[i] == '\n') { - result[j++] = tok_mode->last_expr_buffer[i]; - break; - } + // Process each character + while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) { + char ch = tok_mode->last_expr_buffer[i]; + + // Handle string quotes + if (ch == '"' || ch == '\'') { + // See comment above to understand this part + if (!in_string) { + in_string = 1; + quote_char = ch; + } else if (ch == quote_char) { + in_string = 0; + } + result[j++] = ch; + } + // Skip comments + else if (ch == '#' && !in_string) { + while (i < tok_mode->last_expr_size - tok_mode->last_expr_end && + tok_mode->last_expr_buffer[i] != '\n') { i++; } - } else { - result[j++] = tok_mode->last_expr_buffer[i]; + if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) { + result[j++] = '\n'; + } + } + // Copy other chars + else { + result[j++] = ch; } + i++; } result[j] = '\0'; // Null-terminate the result string @@ -164,11 +214,9 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) { tok_mode->last_expr_size - tok_mode->last_expr_end, NULL ); - } - - if (!res) { + if (!res) { return -1; } token->metadata = res; pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy