Skip to content

Commit ef66fb5

Browse files
authored
pythongh-135148: Correctly handle f/t strings with comments and debug expressions (python#135198)
1 parent e89923d commit ef66fb5

File tree

3 files changed

+83
-20
lines changed

3 files changed

+83
-20
lines changed

Lib/test/test_fstring.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1651,6 +1651,18 @@ def __repr__(self):
16511651
self.assertEqual(f"{1+2 = # my comment
16521652
}", '1+2 = \n 3')
16531653

1654+
self.assertEqual(f'{""" # booo
1655+
"""=}', '""" # booo\n """=\' # booo\\n \'')
1656+
1657+
self.assertEqual(f'{" # nooo "=}', '" # nooo "=\' # nooo \'')
1658+
self.assertEqual(f'{" \" # nooo \" "=}', '" \\" # nooo \\" "=\' " # nooo " \'')
1659+
1660+
self.assertEqual(f'{ # some comment goes here
1661+
"""hello"""=}', ' \n """hello"""=\'hello\'')
1662+
self.assertEqual(f'{"""# this is not a comment
1663+
a""" # this is a comment
1664+
}', '# this is not a comment\n a')
1665+
16541666
# These next lines contains tabs. Backslash escapes don't
16551667
# work in f-strings.
16561668
# patchcheck doesn't like these tabs. So the only way to test
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fixed a bug where f-string debug expressions (using =) would incorrectly
2+
strip out parts of strings containing escaped quotes and # characters. Patch
3+
by Pablo Galindo.

Parser/lexer/lexer.c

Lines changed: 68 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -121,38 +121,88 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
121121
}
122122
PyObject *res = NULL;
123123

124-
// Check if there is a # character in the expression
124+
// Look for a # character outside of string literals
125125
int hash_detected = 0;
126+
int in_string = 0;
127+
char quote_char = 0;
128+
126129
for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
127-
if (tok_mode->last_expr_buffer[i] == '#') {
130+
char ch = tok_mode->last_expr_buffer[i];
131+
132+
// Skip escaped characters
133+
if (ch == '\\') {
134+
i++;
135+
continue;
136+
}
137+
138+
// Handle quotes
139+
if (ch == '"' || ch == '\'') {
140+
// The following if/else block works becase there is an off number
141+
// of quotes in STRING tokens and the lexer only ever reaches this
142+
// function with valid STRING tokens.
143+
// For example: """hello"""
144+
// First quote: in_string = 1
145+
// Second quote: in_string = 0
146+
// Third quote: in_string = 1
147+
if (!in_string) {
148+
in_string = 1;
149+
quote_char = ch;
150+
}
151+
else if (ch == quote_char) {
152+
in_string = 0;
153+
}
154+
continue;
155+
}
156+
157+
// Check for # outside strings
158+
if (ch == '#' && !in_string) {
128159
hash_detected = 1;
129160
break;
130161
}
131162
}
132-
163+
// If we found a # character in the expression, we need to handle comments
133164
if (hash_detected) {
134-
Py_ssize_t input_length = tok_mode->last_expr_size - tok_mode->last_expr_end;
135-
char *result = (char *)PyMem_Malloc((input_length + 1) * sizeof(char));
165+
// Allocate buffer for processed result
166+
char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
136167
if (!result) {
137168
return -1;
138169
}
139170

140-
Py_ssize_t i = 0;
141-
Py_ssize_t j = 0;
171+
Py_ssize_t i = 0; // Input position
172+
Py_ssize_t j = 0; // Output position
173+
in_string = 0; // Whether we're in a string
174+
quote_char = 0; // Current string quote char
142175

143-
for (i = 0, j = 0; i < input_length; i++) {
144-
if (tok_mode->last_expr_buffer[i] == '#') {
145-
// Skip characters until newline or end of string
146-
while (i < input_length && tok_mode->last_expr_buffer[i] != '\0') {
147-
if (tok_mode->last_expr_buffer[i] == '\n') {
148-
result[j++] = tok_mode->last_expr_buffer[i];
149-
break;
150-
}
176+
// Process each character
177+
while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
178+
char ch = tok_mode->last_expr_buffer[i];
179+
180+
// Handle string quotes
181+
if (ch == '"' || ch == '\'') {
182+
// See comment above to understand this part
183+
if (!in_string) {
184+
in_string = 1;
185+
quote_char = ch;
186+
} else if (ch == quote_char) {
187+
in_string = 0;
188+
}
189+
result[j++] = ch;
190+
}
191+
// Skip comments
192+
else if (ch == '#' && !in_string) {
193+
while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
194+
tok_mode->last_expr_buffer[i] != '\n') {
151195
i++;
152196
}
153-
} else {
154-
result[j++] = tok_mode->last_expr_buffer[i];
197+
if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
198+
result[j++] = '\n';
199+
}
200+
}
201+
// Copy other chars
202+
else {
203+
result[j++] = ch;
155204
}
205+
i++;
156206
}
157207

158208
result[j] = '\0'; // Null-terminate the result string
@@ -164,11 +214,9 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
164214
tok_mode->last_expr_size - tok_mode->last_expr_end,
165215
NULL
166216
);
167-
168217
}
169218

170-
171-
if (!res) {
219+
if (!res) {
172220
return -1;
173221
}
174222
token->metadata = res;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy