Skip to content

py: Implement partial PEP-498 (f-string) support (v2) #6247

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions mpy-cross/mpconfigport.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@
#define MICROPY_PY_IO (0)
#define MICROPY_PY_SYS (0)

#define MICROPY_PY_FSTRING (1)

// type definitions for the specific machine

#ifdef __LP64__
Expand Down
1 change: 1 addition & 0 deletions ports/esp32/mpconfigport.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@
#define MICROPY_PY_MATH_SPECIAL_FUNCTIONS (1)
#define MICROPY_PY_MATH_ISCLOSE (1)
#define MICROPY_PY_CMATH (1)
#define MICROPY_PY_FSTRING (1)
#define MICROPY_PY_GC (1)
#define MICROPY_PY_IO (1)
#define MICROPY_PY_IO_IOBASE (1)
Expand Down
3 changes: 3 additions & 0 deletions ports/stm32/mpconfigport.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,9 @@
#define MICROPY_PY_MATH_ISCLOSE (1)
#define MICROPY_PY_MATH_FACTORIAL (1)
#define MICROPY_PY_CMATH (1)
#ifndef MICROPY_PY_FSTRING
#define MICROPY_PY_FSTRING (1)
#endif
#define MICROPY_PY_IO (1)
#define MICROPY_PY_IO_IOBASE (1)
#define MICROPY_PY_IO_FILEIO (MICROPY_VFS_FAT || MICROPY_VFS_LFS1 || MICROPY_VFS_LFS2)
Expand Down
1 change: 1 addition & 0 deletions ports/unix/mpconfigport.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@
#define MICROPY_PY_SYS_EXC_INFO (1)
#define MICROPY_PY_COLLECTIONS_DEQUE (1)
#define MICROPY_PY_COLLECTIONS_ORDEREDDICT (1)
#define MICROPY_PY_FSTRING (1)
#ifndef MICROPY_PY_MATH_SPECIAL_FUNCTIONS
#define MICROPY_PY_MATH_SPECIAL_FUNCTIONS (1)
#endif
Expand Down
1 change: 1 addition & 0 deletions ports/windows/mpconfigport.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
#define MICROPY_PY_SYS_EXC_INFO (1)
#define MICROPY_PY_COLLECTIONS_DEQUE (1)
#define MICROPY_PY_COLLECTIONS_ORDEREDDICT (1)
#define MICROPY_PY_FSTRING (1)
#define MICROPY_PY_MATH_SPECIAL_FUNCTIONS (1)
#define MICROPY_PY_MATH_ISCLOSE (1)
#define MICROPY_PY_CMATH (1)
Expand Down
146 changes: 142 additions & 4 deletions py/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ STATIC bool is_char_or3(mp_lexer_t *lex, byte c1, byte c2, byte c3) {
return lex->chr0 == c1 || lex->chr0 == c2 || lex->chr0 == c3;
}

#if MICROPY_PY_FSTRING
STATIC bool is_char_or4(mp_lexer_t *lex, byte c1, byte c2, byte c3, byte c4) {
return lex->chr0 == c1 || lex->chr0 == c2 || lex->chr0 == c3 || lex->chr0 == c4;
}
#endif

STATIC bool is_char_following(mp_lexer_t *lex, byte c) {
return lex->chr1 == c;
}
Expand Down Expand Up @@ -105,7 +111,13 @@ STATIC bool is_following_odigit(mp_lexer_t *lex) {

STATIC bool is_string_or_bytes(mp_lexer_t *lex) {
return is_char_or(lex, '\'', '\"')
#if MICROPY_PY_FSTRING
|| (is_char_or4(lex, 'r', 'u', 'b', 'f') && is_char_following_or(lex, '\'', '\"'))
|| (((is_char_and(lex, 'r', 'f') || is_char_and(lex, 'f', 'r'))
&& is_char_following_following_or(lex, '\'', '\"')))
#else
|| (is_char_or3(lex, 'r', 'u', 'b') && is_char_following_or(lex, '\'', '\"'))
#endif
|| ((is_char_and(lex, 'r', 'b') || is_char_and(lex, 'b', 'r'))
&& is_char_following_following_or(lex, '\'', '\"'));
}
Expand All @@ -119,6 +131,31 @@ STATIC bool is_tail_of_identifier(mp_lexer_t *lex) {
return is_head_of_identifier(lex) || is_digit(lex);
}

#if MICROPY_PY_FSTRING
STATIC void swap_char_banks(mp_lexer_t *lex) {
if (lex->vstr_postfix_processing) {
lex->chr3 = lex->chr0;
lex->chr4 = lex->chr1;
lex->chr5 = lex->chr2;
lex->chr0 = lex->vstr_postfix.buf[0];
lex->chr1 = lex->vstr_postfix.buf[1];
lex->chr2 = lex->vstr_postfix.buf[2];

lex->vstr_postfix_idx = 3;
} else {
// blindly reset to the "backup" bank when done postfix processing
// this restores control to the mp_reader
lex->chr0 = lex->chr3;
lex->chr1 = lex->chr4;
lex->chr2 = lex->chr5;
// willfully ignoring setting chr3-5 here - WARNING consider those garbage data now

vstr_reset(&lex->vstr_postfix);
lex->vstr_postfix_idx = 0;
}
}
#endif

STATIC void next_char(mp_lexer_t *lex) {
if (lex->chr0 == '\n') {
// a new line
Expand All @@ -134,7 +171,19 @@ STATIC void next_char(mp_lexer_t *lex) {

lex->chr0 = lex->chr1;
lex->chr1 = lex->chr2;
lex->chr2 = lex->reader.readbyte(lex->reader.data);

#if MICROPY_PY_FSTRING
if (lex->vstr_postfix_processing) {
if (lex->vstr_postfix_idx == lex->vstr_postfix.len) {
lex->chr2 = '\0';
} else {
lex->chr2 = lex->vstr_postfix.buf[lex->vstr_postfix_idx++];
}
} else
#endif
{
lex->chr2 = lex->reader.readbyte(lex->reader.data);
}

if (lex->chr1 == '\r') {
// CR is a new line, converted to LF
Expand All @@ -149,6 +198,13 @@ STATIC void next_char(mp_lexer_t *lex) {
if (lex->chr2 == MP_LEXER_EOF && lex->chr1 != MP_LEXER_EOF && lex->chr1 != '\n') {
lex->chr2 = '\n';
}

#if MICROPY_PY_FSTRING
if (lex->vstr_postfix_processing && lex->chr0 == '\0') {
lex->vstr_postfix_processing = false;
swap_char_banks(lex);
}
#endif
}

STATIC void indent_push(mp_lexer_t *lex, size_t indent) {
Expand Down Expand Up @@ -272,7 +328,7 @@ STATIC bool get_hex(mp_lexer_t *lex, size_t num_digits, mp_uint_t *result) {
return true;
}

STATIC void parse_string_literal(mp_lexer_t *lex, bool is_raw) {
STATIC void parse_string_literal(mp_lexer_t *lex, bool is_raw, bool is_fstring) {
// get first quoting character
char quote_char = '\'';
if (is_char(lex, '\"')) {
Expand All @@ -293,15 +349,69 @@ STATIC void parse_string_literal(mp_lexer_t *lex, bool is_raw) {
}

size_t n_closing = 0;
#if MICROPY_PY_FSTRING
bool in_expression = false;
bool expression_eat = true;
#endif

while (!is_end(lex) && (num_quotes > 1 || !is_char(lex, '\n')) && n_closing < num_quotes) {
if (is_char(lex, quote_char)) {
n_closing += 1;
vstr_add_char(&lex->vstr, CUR_CHAR(lex));
} else {
n_closing = 0;

#if MICROPY_PY_FSTRING
if (is_fstring && is_char(lex, '{')) {
vstr_add_char(&lex->vstr, CUR_CHAR(lex));
in_expression = !in_expression;
expression_eat = in_expression;

if (lex->vstr_postfix.len == 0) {
vstr_add_str(&lex->vstr_postfix, ".format(");
}

next_char(lex);
continue;
}

if (is_fstring && is_char(lex, '}')) {
vstr_add_char(&lex->vstr, CUR_CHAR(lex));

if (in_expression) {
in_expression = false;
vstr_add_char(&lex->vstr_postfix, ',');
}

next_char(lex);
continue;
}

if (in_expression) {
// throw errors for illegal chars inside f-string expressions
if (is_char(lex, '#') || is_char(lex, '\\')) {
lex->tok_kind = MP_TOKEN_MALFORMED_FSTRING;
return;
} else if (is_char(lex, ':')) {
expression_eat = false;
}

unichar c = CUR_CHAR(lex);
if (expression_eat) {
vstr_add_char(&lex->vstr_postfix, c);
} else {
vstr_add_char(&lex->vstr, c);
}

next_char(lex);
continue;
}
#endif

if (is_char(lex, '\\')) {
next_char(lex);
unichar c = CUR_CHAR(lex);

if (is_raw) {
// raw strings allow escaping of quotes, but the backslash is also emitted
vstr_add_char(&lex->vstr, '\\');
Expand Down Expand Up @@ -450,6 +560,15 @@ STATIC bool skip_whitespace(mp_lexer_t *lex, bool stop_at_newline) {
}

void mp_lexer_to_next(mp_lexer_t *lex) {
#if MICROPY_PY_FSTRING
if (lex->vstr_postfix.len && !lex->vstr_postfix_processing) {
// end format call injection
vstr_add_char(&lex->vstr_postfix, ')');
lex->vstr_postfix_processing = true;
swap_char_banks(lex);
}
#endif

// start new token text
vstr_reset(&lex->vstr);

Expand Down Expand Up @@ -505,6 +624,7 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
do {
// parse type codes
bool is_raw = false;
bool is_fstring = false;
mp_token_kind_t kind = MP_TOKEN_STRING;
int n_char = 0;
if (is_char(lex, 'u')) {
Expand All @@ -523,7 +643,23 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
kind = MP_TOKEN_BYTES;
n_char = 2;
}
#if MICROPY_PY_FSTRING
if (is_char_following(lex, 'f')) {
lex->tok_kind = MP_TOKEN_FSTRING_RAW;
break;
}
#endif
}
#if MICROPY_PY_FSTRING
else if (is_char(lex, 'f')) {
if (is_char_following(lex, 'r')) {
lex->tok_kind = MP_TOKEN_FSTRING_RAW;
break;
}
n_char = 1;
is_fstring = true;
}
#endif

// Set or check token kind
if (lex->tok_kind == MP_TOKEN_END) {
Expand All @@ -542,13 +678,12 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
}

// Parse the literal
parse_string_literal(lex, is_raw);
parse_string_literal(lex, is_raw, is_fstring);

// Skip whitespace so we can check if there's another string following
skip_whitespace(lex, true);

} while (is_string_or_bytes(lex));

} else if (is_head_of_identifier(lex)) {
lex->tok_kind = MP_TOKEN_NAME;

Expand Down Expand Up @@ -702,6 +837,9 @@ mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader) {
lex->num_indent_level = 1;
lex->indent_level = m_new(uint16_t, lex->alloc_indent_level);
vstr_init(&lex->vstr, 32);
#if MICROPY_PY_FSTRING
vstr_init(&lex->vstr_postfix, 0);
#endif

// store sentinel for first indentation level
lex->indent_level[0] = 0;
Expand Down
10 changes: 10 additions & 0 deletions py/lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ typedef enum _mp_token_kind_t {
MP_TOKEN_INVALID,
MP_TOKEN_DEDENT_MISMATCH,
MP_TOKEN_LONELY_STRING_OPEN,
#if MICROPY_PY_FSTRING
MP_TOKEN_MALFORMED_FSTRING,
MP_TOKEN_FSTRING_RAW,
#endif

MP_TOKEN_NEWLINE,
MP_TOKEN_INDENT,
Expand Down Expand Up @@ -158,6 +162,7 @@ typedef struct _mp_lexer_t {
mp_reader_t reader; // stream source

unichar chr0, chr1, chr2; // current cached characters from source
unichar chr3, chr4, chr5; // current cached characters from alt source

size_t line; // current source line
size_t column; // current source column
Expand All @@ -173,6 +178,11 @@ typedef struct _mp_lexer_t {
size_t tok_column; // token source column
mp_token_kind_t tok_kind; // token kind
vstr_t vstr; // token data
#if MICROPY_PY_FSTRING
vstr_t vstr_postfix; // postfix to apply to string
bool vstr_postfix_processing;
uint16_t vstr_postfix_idx;
#endif
} mp_lexer_t;

mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader);
Expand Down
6 changes: 6 additions & 0 deletions py/mpconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -1113,6 +1113,12 @@ typedef double mp_float_t;
#define MICROPY_PY_COLLECTIONS_NAMEDTUPLE__ASDICT (0)
#endif

// Whether to include support for PEP-498 f-strings
#ifndef MICROPY_PY_FSTRING
#define MICROPY_PY_FSTRING (0)
#endif


// Whether to provide "math" module
#ifndef MICROPY_PY_MATH
#define MICROPY_PY_MATH (1)
Expand Down
8 changes: 8 additions & 0 deletions py/parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -1155,6 +1155,14 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
} else if (lex->tok_kind == MP_TOKEN_DEDENT_MISMATCH) {
exc = mp_obj_new_exception_msg(&mp_type_IndentationError,
MP_ERROR_TEXT("unindent doesn't match any outer indent level"));
#if MICROPY_PY_FSTRING
} else if (lex->tok_kind == MP_TOKEN_MALFORMED_FSTRING) {
exc = mp_obj_new_exception_msg(&mp_type_SyntaxError,
MP_ERROR_TEXT("malformed f-string"));
} else if (lex->tok_kind == MP_TOKEN_FSTRING_RAW) {
exc = mp_obj_new_exception_msg(&mp_type_SyntaxError,
MP_ERROR_TEXT("raw f-strings are not supported"));
#endif
} else {
exc = mp_obj_new_exception_msg(&mp_type_SyntaxError,
MP_ERROR_TEXT("invalid syntax"));
Expand Down
Loading
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy