From 8039fa41935c4eb146663b34aec9a30cfdd75c1a Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 25 Jun 2020 13:01:19 +0200 Subject: [PATCH] bpo-40521: Optimize PyBytes_FromStringAndSize(str, 0) Always create the empty bytes string singleton. Optimize PyBytes_FromStringAndSize(str, 0): it no longer has to check if the empty string singleton was created or not, it is always available. Add functions: * _PyBytes_Init() * bytes_get_empty(), bytes_new_empty() * bytes_create_empty_string_singleton() * unicode_create_empty_string_singleton() * _Py_unicode_state: rename empty member to empty_string --- Include/internal/pycore_interp.h | 4 +- Include/internal/pycore_pylifecycle.h | 1 + Objects/bytesobject.c | 91 +++++++++++++++++++-------- Objects/unicodeobject.c | 59 ++++++++++------- Python/pylifecycle.c | 5 ++ 5 files changed, 107 insertions(+), 53 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index bf1769e5ce2c24..cfc27470c80411 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -66,13 +66,13 @@ struct _Py_unicode_fs_codec { }; struct _Py_bytes_state { + PyObject *empty_string; PyBytesObject *characters[256]; - PyBytesObject *empty_string; }; struct _Py_unicode_state { // The empty Unicode object is a singleton to improve performance. - PyObject *empty; + PyObject *empty_string; /* Single character Unicode strings in the Latin-1 range are being shared as well. */ PyObject *latin1[256]; diff --git a/Include/internal/pycore_pylifecycle.h b/Include/internal/pycore_pylifecycle.h index 3b2173787118f9..bffc95b27e946c 100644 --- a/Include/internal/pycore_pylifecycle.h +++ b/Include/internal/pycore_pylifecycle.h @@ -32,6 +32,7 @@ PyAPI_FUNC(int) _Py_IsLocaleCoercionTarget(const char *ctype_loc); /* Various one-time initializers */ extern PyStatus _PyUnicode_Init(PyThreadState *tstate); +extern PyStatus _PyBytes_Init(PyThreadState *tstate); extern int _PyStructSequence_Init(void); extern int _PyLong_Init(PyThreadState *tstate); extern PyStatus _PyTuple_Init(PyThreadState *tstate); diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index ce006e15dce9ea..782bc8e1fa0b7d 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -4,8 +4,9 @@ #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() -#include "pycore_bytes_methods.h" -#include "pycore_object.h" +#include "pycore_bytes_methods.h" // _Py_bytes_startswith() +#include "pycore_initconfig.h" // _PyStatus_OK() +#include "pycore_object.h" // _PyObject_GC_TRACK #include "pycore_pymem.h" // PYMEM_CLEANBYTE #include "pystrhex.h" @@ -41,6 +42,44 @@ get_bytes_state(void) } +// Return a borrowed reference to the empty bytes string singleton. +static inline PyObject* bytes_get_empty(void) +{ + struct _Py_bytes_state *state = get_bytes_state(); + // bytes_get_empty() must not be called before _PyBytes_Init() + // or after _PyBytes_Fini() + assert(state->empty_string != NULL); + return state->empty_string; +} + + +// Return a strong reference to the empty bytes string singleton. +static inline PyObject* bytes_new_empty(void) +{ + PyObject *empty = bytes_get_empty(); + Py_INCREF(empty); + return (PyObject *)empty; +} + + +static int +bytes_create_empty_string_singleton(struct _Py_bytes_state *state) +{ + // Create the empty bytes string singleton + PyBytesObject *op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE); + if (op == NULL) { + return -1; + } + _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, 0); + op->ob_shash = -1; + op->ob_sval[0] = '\0'; + + assert(state->empty_string == NULL); + state->empty_string = (PyObject *)op; + return 0; +} + + /* For PyBytes_FromString(), the parameter `str' points to a null-terminated string containing exactly `size' bytes. @@ -70,12 +109,7 @@ _PyBytes_FromSize(Py_ssize_t size, int use_calloc) assert(size >= 0); if (size == 0) { - struct _Py_bytes_state *state = get_bytes_state(); - op = state->empty_string; - if (op != NULL) { - Py_INCREF(op); - return (PyObject *)op; - } + return bytes_new_empty(); } if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) { @@ -94,13 +128,8 @@ _PyBytes_FromSize(Py_ssize_t size, int use_calloc) } _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size); op->ob_shash = -1; - if (!use_calloc) + if (!use_calloc) { op->ob_sval[size] = '\0'; - /* empty byte string singleton */ - if (size == 0) { - struct _Py_bytes_state *state = get_bytes_state(); - Py_INCREF(op); - state->empty_string = op; } return (PyObject *) op; } @@ -122,6 +151,9 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) return (PyObject *)op; } } + if (size == 0) { + return bytes_new_empty(); + } op = (PyBytesObject *)_PyBytes_FromSize(size, 0); if (op == NULL) @@ -155,11 +187,7 @@ PyBytes_FromString(const char *str) struct _Py_bytes_state *state = get_bytes_state(); if (size == 0) { - op = state->empty_string; - if (op != NULL) { - Py_INCREF(op); - return (PyObject *)op; - } + return bytes_new_empty(); } else if (size == 1) { op = state->characters[*str & UCHAR_MAX]; @@ -178,11 +206,8 @@ PyBytes_FromString(const char *str) op->ob_shash = -1; memcpy(op->ob_sval, str, size+1); /* share short strings */ - if (size == 0) { - Py_INCREF(op); - state->empty_string = op; - } - else if (size == 1) { + if (size == 1) { + assert(state->characters[*str & UCHAR_MAX] == NULL); Py_INCREF(op); state->characters[*str & UCHAR_MAX] = op; } @@ -1272,7 +1297,7 @@ PyBytes_AsStringAndSize(PyObject *obj, /* -------------------------------------------------------------------- */ /* Methods */ -#define STRINGLIB_GET_EMPTY() get_bytes_state()->empty_string +#define STRINGLIB_GET_EMPTY() bytes_get_empty() #include "stringlib/stringdefs.h" @@ -3053,9 +3078,9 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize) goto error; } if (newsize == 0) { - *pv = _PyBytes_FromSize(0, 0); + *pv = bytes_new_empty(); Py_DECREF(v); - return (*pv == NULL) ? -1 : 0; + return 0; } /* XXX UNREF/NEWREF interface should be more symmetrical */ #ifdef Py_REF_DEBUG @@ -3084,6 +3109,18 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize) return -1; } + +PyStatus +_PyBytes_Init(PyThreadState *tstate) +{ + struct _Py_bytes_state *state = &tstate->interp->bytes; + if (bytes_create_empty_string_singleton(state) < 0) { + return _PyStatus_NO_MEMORY(); + } + return _PyStatus_OK(); +} + + void _PyBytes_Fini(PyThreadState *tstate) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5ba99514d29691..55c886727ba2ed 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -41,16 +41,15 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #define PY_SSIZE_T_CLEAN #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() -#include "pycore_bytes_methods.h" -#include "pycore_fileutils.h" -#include "pycore_initconfig.h" +#include "pycore_bytes_methods.h" // _Py_bytes_lower() +#include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interp.h" // PyInterpreterState.fs_codec -#include "pycore_object.h" -#include "pycore_pathconfig.h" -#include "pycore_pylifecycle.h" +#include "pycore_object.h" // _PyObject_GC_TRACK() +#include "pycore_pathconfig.h" // _Py_DumpPathConfig() +#include "pycore_pylifecycle.h" // _Py_SetFileSystemEncoding() #include "pycore_pystate.h" // _PyInterpreterState_GET() -#include "ucnhash.h" -#include "stringlib/eq.h" +#include "ucnhash.h" // _PyUnicode_Name_CAPI +#include "stringlib/eq.h" // unicode_eq() #ifdef MS_WINDOWS #include @@ -236,10 +235,12 @@ static inline PyObject* unicode_get_empty(void) struct _Py_unicode_state *state = get_unicode_state(); // unicode_get_empty() must not be called before _PyUnicode_Init() // or after _PyUnicode_Fini() - assert(state->empty != NULL); - return state->empty; + assert(state->empty_string != NULL); + return state->empty_string; } + +// Return a strong reference to the empty string singleton. static inline PyObject* unicode_new_empty(void) { PyObject *empty = unicode_get_empty(); @@ -1385,6 +1386,26 @@ _PyUnicode_Dump(PyObject *op) } #endif +static int +unicode_create_empty_string_singleton(struct _Py_unicode_state *state) +{ + // Use size=1 rather than size=0, so PyUnicode_New(0, maxchar) can be + // optimized to always use state->empty_string without having to check if + // it is NULL or not. + PyObject *empty = PyUnicode_New(1, 0); + if (empty == NULL) { + return -1; + } + PyUnicode_1BYTE_DATA(empty)[0] = 0; + _PyUnicode_LENGTH(empty) = 0; + assert(_PyUnicode_CheckConsistency(empty, 1)); + + assert(state->empty_string == NULL); + state->empty_string = empty; + return 0; +} + + PyObject * PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) { @@ -1972,7 +1993,7 @@ static int unicode_is_singleton(PyObject *unicode) { struct _Py_unicode_state *state = get_unicode_state(); - if (unicode == state->empty) { + if (unicode == state->empty_string) { return 1; } PyASCIIObject *ascii = (PyASCIIObject *)unicode; @@ -15542,20 +15563,10 @@ _PyUnicode_Init(PyThreadState *tstate) 0x2029, /* PARAGRAPH SEPARATOR */ }; - // Use size=1 rather than size=0, so PyUnicode_New(0, maxchar) can be - // optimized to always use state->empty without having to check if it is - // NULL or not. - PyObject *empty = PyUnicode_New(1, 0); - if (empty == NULL) { + struct _Py_unicode_state *state = &tstate->interp->unicode; + if (unicode_create_empty_string_singleton(state) < 0) { return _PyStatus_NO_MEMORY(); } - PyUnicode_1BYTE_DATA(empty)[0] = 0; - _PyUnicode_LENGTH(empty) = 0; - assert(_PyUnicode_CheckConsistency(empty, 1)); - - struct _Py_unicode_state *state = &tstate->interp->unicode; - assert(state->empty == NULL); - state->empty = empty; if (_Py_IsMainInterpreter(tstate)) { /* initialize the linebreak bloom filter */ @@ -16223,7 +16234,7 @@ _PyUnicode_Fini(PyThreadState *tstate) #endif /* __INSURE__ */ } - Py_CLEAR(state->empty); + Py_CLEAR(state->empty_string); for (Py_ssize_t i = 0; i < 256; i++) { Py_CLEAR(state->latin1[i]); diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 4b658f847bc12b..cd993ea13418ff 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -607,6 +607,11 @@ pycore_init_types(PyThreadState *tstate) return status; } + status = _PyBytes_Init(tstate); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + status = _PyExc_Init(tstate); if (_PyStatus_EXCEPTION(status)) { return status; pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy