Skip to content

Commit 5f8456e

Browse files
committed
gh-135148: Correctly handle f/t strings with comments and debug expressions
1 parent 1ffe913 commit 5f8456e

File tree

2 files changed

+78
-13
lines changed

2 files changed

+78
-13
lines changed

Lib/test/test_fstring.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1651,6 +1651,12 @@ def __repr__(self):
16511651
self.assertEqual(f"{1+2 = # my comment
16521652
}", '1+2 = \n 3')
16531653

1654+
self.assertEqual(f'{""" # booo
1655+
"""=}', '""" # booo\n """=\' # booo\\n \'')
1656+
1657+
self.assertEqual(f'{" # nooo "=}', '" # nooo "=\' # nooo \'')
1658+
self.assertEqual(f'{" \" # nooo \" "=}', '" \\" # nooo \\" "=\' " # nooo " \'')
1659+
16541660
# These next lines contains tabs. Backslash escapes don't
16551661
# work in f-strings.
16561662
# patchcheck doesn't like these tabs. So the only way to test

Parser/lexer/lexer.c

Lines changed: 72 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -123,35 +123,96 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
123123

124124
// Check if there is a # character in the expression
125125
int hash_detected = 0;
126+
int in_string = 0;
127+
char string_quote = 0;
126128
for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
127-
if (tok_mode->last_expr_buffer[i] == '#') {
129+
char ch = tok_mode->last_expr_buffer[i];
130+
if (ch == '\\' && i + 1 < tok_mode->last_expr_size - tok_mode->last_expr_end) {
131+
// Skip the next character if it's an escape sequence
132+
i++;
133+
continue;
134+
}
135+
if (ch == '"' || ch == '\'') {
136+
if (!in_string) {
137+
in_string = 1;
138+
string_quote = ch;
139+
} else if (ch == string_quote) {
140+
// Check for triple quotes
141+
if (i > 0 && tok_mode->last_expr_buffer[i-1] == ch &&
142+
i > 1 && tok_mode->last_expr_buffer[i-2] == ch) {
143+
// Skip the rest of the triple quote
144+
i += 2;
145+
}
146+
in_string = 0;
147+
}
148+
} else if (ch == '#' && !in_string) {
128149
hash_detected = 1;
129150
break;
130151
}
131152
}
132-
153+
// If we found a # character in the expression, we need to handle comments
133154
if (hash_detected) {
155+
// Calculate length of input we need to process
134156
Py_ssize_t input_length = tok_mode->last_expr_size - tok_mode->last_expr_end;
157+
158+
// Allocate buffer for processed result, with room for null terminator
135159
char *result = (char *)PyMem_Malloc((input_length + 1) * sizeof(char));
136160
if (!result) {
137161
return -1;
138162
}
139163

140-
Py_ssize_t i = 0;
141-
Py_ssize_t j = 0;
164+
// Initialize counters and state
165+
Py_ssize_t i = 0; // Input position
166+
Py_ssize_t j = 0; // Output position
167+
in_string = 0; // Whether we're currently inside a string
168+
string_quote = 0; // The quote character for current string (' or ")
142169

170+
// Process each character of input
143171
for (i = 0, j = 0; i < input_length; i++) {
144-
if (tok_mode->last_expr_buffer[i] == '#') {
145-
// Skip characters until newline or end of string
172+
char ch = tok_mode->last_expr_buffer[i];
173+
174+
// Handle escape sequences - copy both backslash and next char
175+
if (ch == '\\' && i + 1 < input_length) {
176+
result[j++] = ch; // Copy backslash
177+
result[j++] = tok_mode->last_expr_buffer[++i]; // Copy escaped char
178+
continue;
179+
}
180+
181+
// Handle string quotes
182+
if (ch == '"' || ch == '\'') {
183+
if (!in_string) {
184+
// Start of new string
185+
in_string = 1;
186+
string_quote = ch;
187+
} else if (ch == string_quote) {
188+
// Potential end of string - check for triple quotes
189+
if (i > 0 && tok_mode->last_expr_buffer[i-1] == ch &&
190+
i > 1 && tok_mode->last_expr_buffer[i-2] == ch) {
191+
// Found triple quote - copy all three quotes
192+
result[j++] = ch;
193+
result[j++] = ch;
194+
result[j++] = ch;
195+
i += 2; // Skip the other two quotes
196+
continue;
197+
}
198+
// End of regular string
199+
in_string = 0;
200+
}
201+
result[j++] = ch; // Copy the quote character
202+
}
203+
// Handle comments - skip everything until newline
204+
else if (ch == '#' && !in_string) {
146205
while (i < input_length && tok_mode->last_expr_buffer[i] != '\0') {
147206
if (tok_mode->last_expr_buffer[i] == '\n') {
148-
result[j++] = tok_mode->last_expr_buffer[i];
207+
result[j++] = tok_mode->last_expr_buffer[i]; // Keep newline
149208
break;
150209
}
151-
i++;
210+
i++; // Skip comment character
152211
}
153-
} else {
154-
result[j++] = tok_mode->last_expr_buffer[i];
212+
}
213+
// Copy any other character unchanged
214+
else {
215+
result[j++] = ch;
155216
}
156217
}
157218

@@ -164,11 +225,9 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
164225
tok_mode->last_expr_size - tok_mode->last_expr_end,
165226
NULL
166227
);
167-
168228
}
169229

170-
171-
if (!res) {
230+
if (!res) {
172231
return -1;
173232
}
174233
token->metadata = res;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy