From 8ac838a4d52b1ca150825aaf9082c9541d7d4116 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 24 Jun 2020 00:03:12 +0200 Subject: [PATCH] bpo-40521: Optimize PyUnicode_New(0, maxchar) Functions of unicodeobject.c, like PyUnicode_New(), no longer check if the empty Unicode singleton has been initialized or not. Consider that it is always initialized. The Unicode API must not be used before _PyUnicode_Init() or after _PyUnicode_Fini(). --- Objects/unicodeobject.c | 80 +++++++++++++---------------------------- 1 file changed, 25 insertions(+), 55 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 06ca7a5751d2fe..e4235b1aca3cf6 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -231,28 +231,19 @@ get_unicode_state(void) // Return a borrowed reference to the empty string singleton. -// Return NULL if the singleton was not created yet. static inline PyObject* unicode_get_empty(void) { struct _Py_unicode_state *state = get_unicode_state(); + // unicode_get_empty() must not be called before _PyUnicode_Init() + // or after _PyUnicode_Fini() + assert(state->empty != NULL); return state->empty; } static inline PyObject* unicode_new_empty(void) { - struct _Py_unicode_state *state = get_unicode_state(); - PyObject *empty = state->empty; - if (empty != NULL) { - Py_INCREF(empty); - } - else { - empty = PyUnicode_New(0, 0); - if (empty != NULL) { - Py_INCREF(empty); - assert(_PyUnicode_CheckConsistency(empty, 1)); - state->empty = empty; - } - } + PyObject *empty = unicode_get_empty(); + Py_INCREF(empty); return empty; } @@ -696,12 +687,9 @@ unicode_result_ready(PyObject *unicode) PyObject *empty = unicode_get_empty(); if (unicode != empty) { Py_DECREF(unicode); - Py_INCREF(empty); - return empty; } - // unicode is the empty string singleton - return unicode; + return empty; } #ifdef LATIN1_SINGLETONS @@ -959,7 +947,7 @@ ensure_unicode(PyObject *obj) /* Compilation of templated routines */ -#define STRINGLIB_GET_EMPTY() unicode_get_empty() +#define STRINGLIB_GET_EMPTY() unicode_get_empty() #include "stringlib/asciilib.h" #include "stringlib/fastsearch.h" @@ -1260,11 +1248,7 @@ _PyUnicode_New(Py_ssize_t length) /* Optimization for empty strings */ if (length == 0) { - PyObject *empty = unicode_get_empty(); - if (empty != NULL) { - Py_INCREF(empty); - return (PyUnicodeObject *)empty; - } + return (PyUnicodeObject *)unicode_new_empty(); } /* Ensure we won't overflow the size. */ @@ -1416,11 +1400,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) { /* Optimization for empty strings */ if (size == 0) { - PyObject *empty = unicode_get_empty(); - if (empty != NULL) { - Py_INCREF(empty); - return empty; - } + return unicode_new_empty(); } PyObject *obj; @@ -2001,8 +1981,7 @@ unicode_dealloc(PyObject *unicode) static int unicode_is_singleton(PyObject *unicode) { - struct _Py_unicode_state *state = get_unicode_state(); - if (unicode == state->empty) { + if (unicode == unicode_get_empty()) { return 1; } #ifdef LATIN1_SINGLETONS @@ -2059,8 +2038,6 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length) if (length == 0) { PyObject *empty = unicode_new_empty(); - if (!empty) - return -1; Py_SETREF(*p_unicode, empty); return 0; } @@ -10868,10 +10845,7 @@ replace(PyObject *self, PyObject *str1, } new_size = slen + n * (len2 - len1); if (new_size == 0) { - PyObject *empty = unicode_new_empty(); - if (!empty) - goto error; - u = empty; + u = unicode_new_empty(); goto done; } if (new_size > (PY_SSIZE_T_MAX / rkind)) { @@ -13293,13 +13267,7 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj) len2 = PyUnicode_GET_LENGTH(sep_obj); if (kind1 < kind2 || len1 < len2) { PyObject *empty = unicode_get_empty(); // Borrowed reference - if (!empty) { - out = NULL; - } - else { - out = PyTuple_Pack(3, str_obj, empty, empty); - } - return out; + return PyTuple_Pack(3, str_obj, empty, empty); } buf1 = PyUnicode_DATA(str_obj); buf2 = PyUnicode_DATA(sep_obj); @@ -13351,13 +13319,7 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj) len2 = PyUnicode_GET_LENGTH(sep_obj); if (kind1 < kind2 || len1 < len2) { PyObject *empty = unicode_get_empty(); // Borrowed reference - if (!empty) { - out = NULL; - } - else { - out = PyTuple_Pack(3, empty, empty, str_obj); - } - return out; + return PyTuple_Pack(3, empty, empty, str_obj); } buf1 = PyUnicode_DATA(str_obj); buf2 = PyUnicode_DATA(sep_obj); @@ -15589,12 +15551,20 @@ _PyUnicode_Init(PyThreadState *tstate) 0x2029, /* PARAGRAPH SEPARATOR */ }; - /* Init the implementation */ - PyObject *empty = unicode_new_empty(); - if (!empty) { + // Use size=1 rather than size=0, so PyUnicode_New(0, maxchar) can be + // optimized to always use state->empty without having to check if it is + // NULL or not. + PyObject *empty = PyUnicode_New(1, 0); + if (empty == NULL) { return _PyStatus_NO_MEMORY(); } - Py_DECREF(empty); + PyUnicode_1BYTE_DATA(empty)[0] = 0; + _PyUnicode_LENGTH(empty) = 0; + assert(_PyUnicode_CheckConsistency(empty, 1)); + + struct _Py_unicode_state *state = &tstate->interp->unicode; + assert(state->empty == NULL); + state->empty = empty; if (_Py_IsMainInterpreter(tstate)) { /* initialize the linebreak bloom filter */ pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy