Skip to content

Commit 5150795

Browse files
authored
gh-119182: Optimize PyUnicode_FromFormat() (#120796)
Use strchr() and ucs1lib_find_max_char() to optimize the code path formatting sub-strings between '%' formats.
1 parent 85d90b5 commit 5150795

File tree

1 file changed

+26
-26
lines changed

1 file changed

+26
-26
lines changed

Objects/unicodeobject.c

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2875,47 +2875,47 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
28752875
static int
28762876
unicode_from_format(_PyUnicodeWriter *writer, const char *format, va_list vargs)
28772877
{
2878-
writer->min_length += strlen(format) + 100;
2878+
Py_ssize_t len = strlen(format);
2879+
writer->min_length += len + 100;
28792880
writer->overallocate = 1;
28802881

2881-
va_list vargs2;
2882-
const char *f;
2883-
28842882
// Copy varags to be able to pass a reference to a subfunction.
2883+
va_list vargs2;
28852884
va_copy(vargs2, vargs);
28862885

2887-
for (f = format; *f; ) {
2886+
// _PyUnicodeWriter_WriteASCIIString() below requires the format string
2887+
// to be encoded to ASCII.
2888+
int is_ascii = (ucs1lib_find_max_char((Py_UCS1*)format, (Py_UCS1*)format + len) < 128);
2889+
if (!is_ascii) {
2890+
Py_ssize_t i;
2891+
for (i=0; i < len && (unsigned char)format[i] <= 127; i++);
2892+
PyErr_Format(PyExc_ValueError,
2893+
"PyUnicode_FromFormatV() expects an ASCII-encoded format "
2894+
"string, got a non-ASCII byte: 0x%02x",
2895+
(unsigned char)format[i]);
2896+
goto fail;
2897+
}
2898+
2899+
for (const char *f = format; *f; ) {
28882900
if (*f == '%') {
28892901
f = unicode_fromformat_arg(writer, f, &vargs2);
28902902
if (f == NULL)
28912903
goto fail;
28922904
}
28932905
else {
2894-
const char *p;
2895-
Py_ssize_t len;
2896-
2897-
p = f;
2898-
do
2899-
{
2900-
if ((unsigned char)*p > 127) {
2901-
PyErr_Format(PyExc_ValueError,
2902-
"PyUnicode_FromFormatV() expects an ASCII-encoded format "
2903-
"string, got a non-ASCII byte: 0x%02x",
2904-
(unsigned char)*p);
2905-
goto fail;
2906-
}
2907-
p++;
2906+
const char *p = strchr(f, '%');
2907+
if (p != NULL) {
2908+
len = p - f;
29082909
}
2909-
while (*p != '\0' && *p != '%');
2910-
len = p - f;
2911-
2912-
if (*p == '\0')
2910+
else {
2911+
len = strlen(f);
29132912
writer->overallocate = 0;
2913+
}
29142914

2915-
if (_PyUnicodeWriter_WriteASCIIString(writer, f, len) < 0)
2915+
if (_PyUnicodeWriter_WriteASCIIString(writer, f, len) < 0) {
29162916
goto fail;
2917-
2918-
f = p;
2917+
}
2918+
f += len;
29192919
}
29202920
}
29212921
va_end(vargs2);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy