Skip to content

Commit 91698d8

Browse files
authored
bpo-40521: Optimize PyBytes_FromStringAndSize(str, 0) (GH-21142)
Always create the empty bytes string singleton. Optimize PyBytes_FromStringAndSize(str, 0): it no longer has to check if the empty string singleton was created or not, it is always available. Add functions: * _PyBytes_Init() * bytes_get_empty(), bytes_new_empty() * bytes_create_empty_string_singleton() * unicode_create_empty_string_singleton() _Py_unicode_state: rename empty structure member to empty_string.
1 parent 0f8ec1f commit 91698d8

File tree

5 files changed

+107
-53
lines changed

5 files changed

+107
-53
lines changed

Include/internal/pycore_interp.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,13 @@ struct _Py_unicode_fs_codec {
6666
};
6767

6868
struct _Py_bytes_state {
69+
PyObject *empty_string;
6970
PyBytesObject *characters[256];
70-
PyBytesObject *empty_string;
7171
};
7272

7373
struct _Py_unicode_state {
7474
// The empty Unicode object is a singleton to improve performance.
75-
PyObject *empty;
75+
PyObject *empty_string;
7676
/* Single character Unicode strings in the Latin-1 range are being
7777
shared as well. */
7878
PyObject *latin1[256];

Include/internal/pycore_pylifecycle.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ PyAPI_FUNC(int) _Py_IsLocaleCoercionTarget(const char *ctype_loc);
3232
/* Various one-time initializers */
3333

3434
extern PyStatus _PyUnicode_Init(PyThreadState *tstate);
35+
extern PyStatus _PyBytes_Init(PyThreadState *tstate);
3536
extern int _PyStructSequence_Init(void);
3637
extern int _PyLong_Init(PyThreadState *tstate);
3738
extern PyStatus _PyTuple_Init(PyThreadState *tstate);

Objects/bytesobject.c

Lines changed: 64 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44

55
#include "Python.h"
66
#include "pycore_abstract.h" // _PyIndex_Check()
7-
#include "pycore_bytes_methods.h"
8-
#include "pycore_object.h"
7+
#include "pycore_bytes_methods.h" // _Py_bytes_startswith()
8+
#include "pycore_initconfig.h" // _PyStatus_OK()
9+
#include "pycore_object.h" // _PyObject_GC_TRACK
910
#include "pycore_pymem.h" // PYMEM_CLEANBYTE
1011

1112
#include "pystrhex.h"
@@ -41,6 +42,44 @@ get_bytes_state(void)
4142
}
4243

4344

45+
// Return a borrowed reference to the empty bytes string singleton.
46+
static inline PyObject* bytes_get_empty(void)
47+
{
48+
struct _Py_bytes_state *state = get_bytes_state();
49+
// bytes_get_empty() must not be called before _PyBytes_Init()
50+
// or after _PyBytes_Fini()
51+
assert(state->empty_string != NULL);
52+
return state->empty_string;
53+
}
54+
55+
56+
// Return a strong reference to the empty bytes string singleton.
57+
static inline PyObject* bytes_new_empty(void)
58+
{
59+
PyObject *empty = bytes_get_empty();
60+
Py_INCREF(empty);
61+
return (PyObject *)empty;
62+
}
63+
64+
65+
static int
66+
bytes_create_empty_string_singleton(struct _Py_bytes_state *state)
67+
{
68+
// Create the empty bytes string singleton
69+
PyBytesObject *op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE);
70+
if (op == NULL) {
71+
return -1;
72+
}
73+
_PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, 0);
74+
op->ob_shash = -1;
75+
op->ob_sval[0] = '\0';
76+
77+
assert(state->empty_string == NULL);
78+
state->empty_string = (PyObject *)op;
79+
return 0;
80+
}
81+
82+
4483
/*
4584
For PyBytes_FromString(), the parameter `str' points to a null-terminated
4685
string containing exactly `size' bytes.
@@ -70,12 +109,7 @@ _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
70109
assert(size >= 0);
71110

72111
if (size == 0) {
73-
struct _Py_bytes_state *state = get_bytes_state();
74-
op = state->empty_string;
75-
if (op != NULL) {
76-
Py_INCREF(op);
77-
return (PyObject *)op;
78-
}
112+
return bytes_new_empty();
79113
}
80114

81115
if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
@@ -94,13 +128,8 @@ _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
94128
}
95129
_PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
96130
op->ob_shash = -1;
97-
if (!use_calloc)
131+
if (!use_calloc) {
98132
op->ob_sval[size] = '\0';
99-
/* empty byte string singleton */
100-
if (size == 0) {
101-
struct _Py_bytes_state *state = get_bytes_state();
102-
Py_INCREF(op);
103-
state->empty_string = op;
104133
}
105134
return (PyObject *) op;
106135
}
@@ -122,6 +151,9 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
122151
return (PyObject *)op;
123152
}
124153
}
154+
if (size == 0) {
155+
return bytes_new_empty();
156+
}
125157

126158
op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
127159
if (op == NULL)
@@ -155,11 +187,7 @@ PyBytes_FromString(const char *str)
155187

156188
struct _Py_bytes_state *state = get_bytes_state();
157189
if (size == 0) {
158-
op = state->empty_string;
159-
if (op != NULL) {
160-
Py_INCREF(op);
161-
return (PyObject *)op;
162-
}
190+
return bytes_new_empty();
163191
}
164192
else if (size == 1) {
165193
op = state->characters[*str & UCHAR_MAX];
@@ -178,11 +206,8 @@ PyBytes_FromString(const char *str)
178206
op->ob_shash = -1;
179207
memcpy(op->ob_sval, str, size+1);
180208
/* share short strings */
181-
if (size == 0) {
182-
Py_INCREF(op);
183-
state->empty_string = op;
184-
}
185-
else if (size == 1) {
209+
if (size == 1) {
210+
assert(state->characters[*str & UCHAR_MAX] == NULL);
186211
Py_INCREF(op);
187212
state->characters[*str & UCHAR_MAX] = op;
188213
}
@@ -1272,7 +1297,7 @@ PyBytes_AsStringAndSize(PyObject *obj,
12721297
/* -------------------------------------------------------------------- */
12731298
/* Methods */
12741299

1275-
#define STRINGLIB_GET_EMPTY() get_bytes_state()->empty_string
1300+
#define STRINGLIB_GET_EMPTY() bytes_get_empty()
12761301

12771302
#include "stringlib/stringdefs.h"
12781303

@@ -3053,9 +3078,9 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
30533078
goto error;
30543079
}
30553080
if (newsize == 0) {
3056-
*pv = _PyBytes_FromSize(0, 0);
3081+
*pv = bytes_new_empty();
30573082
Py_DECREF(v);
3058-
return (*pv == NULL) ? -1 : 0;
3083+
return 0;
30593084
}
30603085
/* XXX UNREF/NEWREF interface should be more symmetrical */
30613086
#ifdef Py_REF_DEBUG
@@ -3084,6 +3109,18 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
30843109
return -1;
30853110
}
30863111

3112+
3113+
PyStatus
3114+
_PyBytes_Init(PyThreadState *tstate)
3115+
{
3116+
struct _Py_bytes_state *state = &tstate->interp->bytes;
3117+
if (bytes_create_empty_string_singleton(state) < 0) {
3118+
return _PyStatus_NO_MEMORY();
3119+
}
3120+
return _PyStatus_OK();
3121+
}
3122+
3123+
30873124
void
30883125
_PyBytes_Fini(PyThreadState *tstate)
30893126
{

Objects/unicodeobject.c

Lines changed: 35 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,15 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
4141
#define PY_SSIZE_T_CLEAN
4242
#include "Python.h"
4343
#include "pycore_abstract.h" // _PyIndex_Check()
44-
#include "pycore_bytes_methods.h"
45-
#include "pycore_fileutils.h"
46-
#include "pycore_initconfig.h"
44+
#include "pycore_bytes_methods.h" // _Py_bytes_lower()
45+
#include "pycore_initconfig.h" // _PyStatus_OK()
4746
#include "pycore_interp.h" // PyInterpreterState.fs_codec
48-
#include "pycore_object.h"
49-
#include "pycore_pathconfig.h"
50-
#include "pycore_pylifecycle.h"
47+
#include "pycore_object.h" // _PyObject_GC_TRACK()
48+
#include "pycore_pathconfig.h" // _Py_DumpPathConfig()
49+
#include "pycore_pylifecycle.h" // _Py_SetFileSystemEncoding()
5150
#include "pycore_pystate.h" // _PyInterpreterState_GET()
52-
#include "ucnhash.h"
53-
#include "stringlib/eq.h"
51+
#include "ucnhash.h" // _PyUnicode_Name_CAPI
52+
#include "stringlib/eq.h" // unicode_eq()
5453

5554
#ifdef MS_WINDOWS
5655
#include <windows.h>
@@ -236,10 +235,12 @@ static inline PyObject* unicode_get_empty(void)
236235
struct _Py_unicode_state *state = get_unicode_state();
237236
// unicode_get_empty() must not be called before _PyUnicode_Init()
238237
// or after _PyUnicode_Fini()
239-
assert(state->empty != NULL);
240-
return state->empty;
238+
assert(state->empty_string != NULL);
239+
return state->empty_string;
241240
}
242241

242+
243+
// Return a strong reference to the empty string singleton.
243244
static inline PyObject* unicode_new_empty(void)
244245
{
245246
PyObject *empty = unicode_get_empty();
@@ -1385,6 +1386,26 @@ _PyUnicode_Dump(PyObject *op)
13851386
}
13861387
#endif
13871388

1389+
static int
1390+
unicode_create_empty_string_singleton(struct _Py_unicode_state *state)
1391+
{
1392+
// Use size=1 rather than size=0, so PyUnicode_New(0, maxchar) can be
1393+
// optimized to always use state->empty_string without having to check if
1394+
// it is NULL or not.
1395+
PyObject *empty = PyUnicode_New(1, 0);
1396+
if (empty == NULL) {
1397+
return -1;
1398+
}
1399+
PyUnicode_1BYTE_DATA(empty)[0] = 0;
1400+
_PyUnicode_LENGTH(empty) = 0;
1401+
assert(_PyUnicode_CheckConsistency(empty, 1));
1402+
1403+
assert(state->empty_string == NULL);
1404+
state->empty_string = empty;
1405+
return 0;
1406+
}
1407+
1408+
13881409
PyObject *
13891410
PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
13901411
{
@@ -1972,7 +1993,7 @@ static int
19721993
unicode_is_singleton(PyObject *unicode)
19731994
{
19741995
struct _Py_unicode_state *state = get_unicode_state();
1975-
if (unicode == state->empty) {
1996+
if (unicode == state->empty_string) {
19761997
return 1;
19771998
}
19781999
PyASCIIObject *ascii = (PyASCIIObject *)unicode;
@@ -15542,20 +15563,10 @@ _PyUnicode_Init(PyThreadState *tstate)
1554215563
0x2029, /* PARAGRAPH SEPARATOR */
1554315564
};
1554415565

15545-
// Use size=1 rather than size=0, so PyUnicode_New(0, maxchar) can be
15546-
// optimized to always use state->empty without having to check if it is
15547-
// NULL or not.
15548-
PyObject *empty = PyUnicode_New(1, 0);
15549-
if (empty == NULL) {
15566+
struct _Py_unicode_state *state = &tstate->interp->unicode;
15567+
if (unicode_create_empty_string_singleton(state) < 0) {
1555015568
return _PyStatus_NO_MEMORY();
1555115569
}
15552-
PyUnicode_1BYTE_DATA(empty)[0] = 0;
15553-
_PyUnicode_LENGTH(empty) = 0;
15554-
assert(_PyUnicode_CheckConsistency(empty, 1));
15555-
15556-
struct _Py_unicode_state *state = &tstate->interp->unicode;
15557-
assert(state->empty == NULL);
15558-
state->empty = empty;
1555915570

1556015571
if (_Py_IsMainInterpreter(tstate)) {
1556115572
/* initialize the linebreak bloom filter */
@@ -16223,7 +16234,7 @@ _PyUnicode_Fini(PyThreadState *tstate)
1622316234
#endif /* __INSURE__ */
1622416235
}
1622516236

16226-
Py_CLEAR(state->empty);
16237+
Py_CLEAR(state->empty_string);
1622716238

1622816239
for (Py_ssize_t i = 0; i < 256; i++) {
1622916240
Py_CLEAR(state->latin1[i]);

Python/pylifecycle.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,11 @@ pycore_init_types(PyThreadState *tstate)
607607
return status;
608608
}
609609

610+
status = _PyBytes_Init(tstate);
611+
if (_PyStatus_EXCEPTION(status)) {
612+
return status;
613+
}
614+
610615
status = _PyExc_Init(tstate);
611616
if (_PyStatus_EXCEPTION(status)) {
612617
return status;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy