From 574e48c060ff85c335bcca5035d8c23c8158c9e5 Mon Sep 17 00:00:00 2001 From: Brett Simmers Date: Wed, 27 Mar 2024 16:06:31 -0700 Subject: [PATCH 1/4] Make _codecs module thread-safe --- Include/internal/pycore_codecs.h | 28 ++++ Include/internal/pycore_interp.h | 6 +- .../internal/pycore_pyatomic_ft_wrappers.h | 7 + Python/codecs.c | 158 ++++++++++++------ Python/pystate.c | 6 +- 5 files changed, 146 insertions(+), 59 deletions(-) diff --git a/Include/internal/pycore_codecs.h b/Include/internal/pycore_codecs.h index a2a7151d50ade7..258de217632d4c 100644 --- a/Include/internal/pycore_codecs.h +++ b/Include/internal/pycore_codecs.h @@ -8,6 +8,8 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#include "pycore_lock.h" // PyMutex + extern PyObject* _PyCodec_Lookup(const char *encoding); /* Text codec specific encoding and decoding API. @@ -48,6 +50,32 @@ extern PyObject* _PyCodecInfo_GetIncrementalEncoder( PyObject *codec_info, const char *errors); +// Per-interpreter state used by codecs.c. +struct codecs_state { + // A list of callable objects used to search for codecs. + PyObject *search_path; + + // A dict mapping codec names to codecs returned from a callable in + // search_path. + PyObject *search_cache; + + // A dict mapping error handling strategies to functions to implement them. + PyObject *error_registry; + +#ifdef Py_GIL_DISABLED + // Used to safely delete a specific item from search_path. + PyMutex search_path_mutex; + + // Used to synchronize initialization of the PyObject* members above. + PyMutex init_mutex; +#endif + + // If an acquire load of initialized yields 1, all of the PyObject* members + // above will be set, and their values will not change until interpreter + // finalization. This allows common operations to freely read them without + // additional synchronization. + int initialized; +}; #ifdef __cplusplus } diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index b8d0fdcce11ba8..3f1ae78b7f7014 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -14,6 +14,7 @@ extern "C" { #include "pycore_atexit.h" // struct atexit_state #include "pycore_ceval_state.h" // struct _ceval_state #include "pycore_code.h" // struct callable_cache +#include "pycore_codecs.h" // struct codecs_state #include "pycore_context.h" // struct _Py_context_state #include "pycore_crossinterp.h" // struct _xidregistry #include "pycore_dict_state.h" // struct _Py_dict_state @@ -164,10 +165,7 @@ struct _is { possible to facilitate out-of-process observability tools. */ - PyObject *codec_search_path; - PyObject *codec_search_cache; - PyObject *codec_error_registry; - int codecs_initialized; + struct codecs_state codecs; PyConfig config; unsigned long feature_flags; diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index e441600d54e1aa..10d74958f86717 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -23,18 +23,25 @@ extern "C" { #define FT_ATOMIC_LOAD_SSIZE(value) _Py_atomic_load_ssize(&value) #define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) \ _Py_atomic_load_ssize_relaxed(&value) +#define FT_ATOMIC_LOAD_INT_ACQUIRE(value) _Py_atomic_load_int_acquire(&value) + #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) \ _Py_atomic_store_ptr_relaxed(&value, new_value) #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) \ _Py_atomic_store_ptr_release(&value, new_value) #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) \ _Py_atomic_store_ssize_relaxed(&value, new_value) +#define FT_ATOMIC_STORE_INT_RELEASE(value, new_value) \ + _Py_atomic_store_int_release(&value, new_value) #else #define FT_ATOMIC_LOAD_SSIZE(value) value #define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) value +#define FT_ATOMIC_LOAD_INT_ACQUIRE(value) value + #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) value = new_value #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_STORE_INT_RELEASE(value, new_value) value = new_value #endif #ifdef __cplusplus diff --git a/Python/codecs.c b/Python/codecs.c index d8fe7b22063a80..9eda246d34c0c9 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -11,6 +11,8 @@ Copyright (c) Corporation for National Research Initiatives. #include "Python.h" #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_interp.h" // PyInterpreterState.codec_search_path +#include "pycore_lock.h" // PyMutex +#include "pycore_pyatomic_ft_wrappers.h" #include "pycore_pyerrors.h" // _PyErr_FormatNote() #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI @@ -30,13 +32,15 @@ const char *Py_hexdigits = "0123456789abcdef"; */ -static int _PyCodecRegistry_Init(void); /* Forward */ +static int _PyCodecRegistry_EnsureInit(PyInterpreterState *); /* Forward */ int PyCodec_Register(PyObject *search_function) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) + if (_PyCodecRegistry_EnsureInit(interp) < 0) { goto onError; + } + PyObject *search_path = interp->codecs.search_path; if (search_function == NULL) { PyErr_BadArgument(); goto onError; @@ -45,7 +49,14 @@ int PyCodec_Register(PyObject *search_function) PyErr_SetString(PyExc_TypeError, "argument must be callable"); goto onError; } - return PyList_Append(interp->codec_search_path, search_function); +#ifdef Py_GIL_DISABLED + PyMutex_Lock(&interp->codecs.search_path_mutex); +#endif + int ret = PyList_Append(search_path, search_function); +#ifdef Py_GIL_DISABLED + PyMutex_Unlock(&interp->codecs.search_path_mutex); +#endif + return ret; onError: return -1; @@ -55,22 +66,33 @@ int PyCodec_Unregister(PyObject *search_function) { PyInterpreterState *interp = _PyInterpreterState_GET(); - PyObject *codec_search_path = interp->codec_search_path; - /* Do nothing if codec_search_path is not created yet or was cleared. */ - if (codec_search_path == NULL) { + /* Do nothing if codec data structures are not created yet. */ + if (FT_ATOMIC_LOAD_INT_ACQUIRE(interp->codecs.initialized) == 0) { return 0; } + PyObject *codec_search_path = interp->codecs.search_path; assert(PyList_CheckExact(codec_search_path)); - Py_ssize_t n = PyList_GET_SIZE(codec_search_path); - for (Py_ssize_t i = 0; i < n; i++) { - PyObject *item = PyList_GET_ITEM(codec_search_path, i); + for (Py_ssize_t i = 0; i < PyList_GET_SIZE(codec_search_path); i++) { +#ifdef Py_GIL_DISABLED + PyMutex_Lock(&interp->codecs.search_path_mutex); +#endif + PyObject *item = PyList_GetItemRef(codec_search_path, i); + int ret = 1; if (item == search_function) { - if (interp->codec_search_cache != NULL) { - assert(PyDict_CheckExact(interp->codec_search_cache)); - PyDict_Clear(interp->codec_search_cache); - } - return PyList_SetSlice(codec_search_path, i, i+1, NULL); + // We hold a reference to the item, so its destructor can't run + // while we hold search_path_mutex. + ret = PyList_SetSlice(codec_search_path, i, i+1, NULL); + } +#ifdef Py_GIL_DISABLED + PyMutex_Unlock(&interp->codecs.search_path_mutex); +#endif + Py_DECREF(item); + if (ret != 1) { + assert(interp->codecs.search_cache != NULL); + assert(PyDict_CheckExact(interp->codecs.search_cache)); + PyDict_Clear(interp->codecs.search_cache); + return ret; } } return 0; @@ -132,7 +154,7 @@ PyObject *_PyCodec_Lookup(const char *encoding) } PyInterpreterState *interp = _PyInterpreterState_GET(); - if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) { + if (_PyCodecRegistry_EnsureInit(interp) < 0) { return NULL; } @@ -147,7 +169,7 @@ PyObject *_PyCodec_Lookup(const char *encoding) /* First, try to lookup the name in the registry dictionary */ PyObject *result; - if (PyDict_GetItemRef(interp->codec_search_cache, v, &result) < 0) { + if (PyDict_GetItemRef(interp->codecs.search_cache, v, &result) < 0) { goto onError; } if (result != NULL) { @@ -156,7 +178,7 @@ PyObject *_PyCodec_Lookup(const char *encoding) } /* Next, scan the search functions in order of registration */ - const Py_ssize_t len = PyList_Size(interp->codec_search_path); + const Py_ssize_t len = PyList_Size(interp->codecs.search_path); if (len < 0) goto onError; if (len == 0) { @@ -170,14 +192,15 @@ PyObject *_PyCodec_Lookup(const char *encoding) for (i = 0; i < len; i++) { PyObject *func; - func = PyList_GetItem(interp->codec_search_path, i); + func = PyList_GetItemRef(interp->codecs.search_path, i); if (func == NULL) goto onError; result = PyObject_CallOneArg(func, v); + Py_DECREF(func); if (result == NULL) goto onError; if (result == Py_None) { - Py_DECREF(result); + Py_CLEAR(result); continue; } if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) { @@ -188,7 +211,7 @@ PyObject *_PyCodec_Lookup(const char *encoding) } break; } - if (i == len) { + if (result == NULL) { /* XXX Perhaps we should cache misses too ? */ PyErr_Format(PyExc_LookupError, "unknown encoding: %s", encoding); @@ -196,7 +219,7 @@ PyObject *_PyCodec_Lookup(const char *encoding) } /* Cache and return the result */ - if (PyDict_SetItem(interp->codec_search_cache, v, result) < 0) { + if (PyDict_SetItem(interp->codecs.search_cache, v, result) < 0) { Py_DECREF(result); goto onError; } @@ -600,13 +623,14 @@ PyObject *_PyCodec_DecodeText(PyObject *object, int PyCodec_RegisterError(const char *name, PyObject *error) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) + if (_PyCodecRegistry_EnsureInit(interp) < 0) { return -1; + } if (!PyCallable_Check(error)) { PyErr_SetString(PyExc_TypeError, "handler must be callable"); return -1; } - return PyDict_SetItemString(interp->codec_error_registry, + return PyDict_SetItemString(interp->codecs.error_registry, name, error); } @@ -616,13 +640,14 @@ int PyCodec_RegisterError(const char *name, PyObject *error) PyObject *PyCodec_LookupError(const char *name) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) + if (_PyCodecRegistry_EnsureInit(interp) < 0) { return NULL; + } if (name==NULL) name = "strict"; PyObject *handler; - if (PyDict_GetItemStringRef(interp->codec_error_registry, name, &handler) < 0) { + if (PyDict_GetItemStringRef(interp->codecs.error_registry, name, &handler) < 0) { return NULL; } if (handler == NULL) { @@ -1375,7 +1400,7 @@ static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc) return PyCodec_SurrogateEscapeErrors(exc); } -static int _PyCodecRegistry_Init(void) +static int _PyCodecRegistry_EnsureInit(PyInterpreterState *interp) { static struct { const char *name; @@ -1463,45 +1488,74 @@ static int _PyCodecRegistry_Init(void) } }; - PyInterpreterState *interp = _PyInterpreterState_GET(); - PyObject *mod; - - if (interp->codec_search_path != NULL) + if (FT_ATOMIC_LOAD_INT_ACQUIRE(interp->codecs.initialized) == 1) { return 0; - - interp->codec_search_path = PyList_New(0); - if (interp->codec_search_path == NULL) { - return -1; } - interp->codec_search_cache = PyDict_New(); - if (interp->codec_search_cache == NULL) { - return -1; + PyObject *search_path = NULL, *search_cache = NULL, *error_registry = NULL; + search_path = PyList_New(0); + if (search_path == NULL) { + goto error; } - - interp->codec_error_registry = PyDict_New(); - if (interp->codec_error_registry == NULL) { - return -1; + search_cache = PyDict_New(); + if (search_cache == NULL) { + goto error; + } + error_registry = PyDict_New(); + if (error_registry == NULL) { + goto error; } - for (size_t i = 0; i < Py_ARRAY_LENGTH(methods); ++i) { PyObject *func = PyCFunction_NewEx(&methods[i].def, NULL, NULL); - if (!func) { - return -1; + if (func == NULL) { + goto error; } - int res = PyCodec_RegisterError(methods[i].name, func); + int res = PyDict_SetItemString(error_registry, methods[i].name, func); Py_DECREF(func); - if (res) { - return -1; + if (res < 0) { + goto error; } } - mod = PyImport_ImportModule("encodings"); - if (mod == NULL) { - return -1; +#ifdef Py_GIL_DISABLED + PyMutex_Lock(&interp->codecs.init_mutex); +#endif + int do_import = 1; + if (interp->codecs.initialized == 0) { + interp->codecs.search_path = search_path; + interp->codecs.search_cache = search_cache; + interp->codecs.error_registry = error_registry; + FT_ATOMIC_STORE_INT_RELEASE(interp->codecs.initialized, 1); + } else { + // Another thread initialized everything while we were preparing. + Py_DECREF(search_path); + Py_DECREF(search_cache); + Py_DECREF(error_registry); + do_import = 0; + } + + // Importing `encodings' can execute arbitrary code and will call back into + // this module to register codec search functions. Do it once everything is + // initialized and we hold no locks. Other Python code may register other + // codecs before `encodings' is finished importing; this is true with or + // without the GIL. +#ifdef Py_GIL_DISABLED + PyMutex_Unlock(&interp->codecs.init_mutex); +#endif + + if (do_import) { + PyObject *mod = PyImport_ImportModule("encodings"); + if (mod == NULL) { + return -1; + } + Py_DECREF(mod); } - Py_DECREF(mod); - interp->codecs_initialized = 1; return 0; + + error: + Py_XDECREF(search_path); + Py_XDECREF(search_cache); + Py_XDECREF(error_registry); + return -1; } diff --git a/Python/pystate.c b/Python/pystate.c index 925d1cff866f18..dcc8cf40c8ac4d 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -821,9 +821,9 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) } PyConfig_Clear(&interp->config); - Py_CLEAR(interp->codec_search_path); - Py_CLEAR(interp->codec_search_cache); - Py_CLEAR(interp->codec_error_registry); + Py_CLEAR(interp->codecs.search_path); + Py_CLEAR(interp->codecs.search_cache); + Py_CLEAR(interp->codecs.error_registry); assert(interp->imports.modules == NULL); assert(interp->imports.modules_by_index == NULL); From d8bcdd8f9bc2cac51b8477143a5b861d302aa00b Mon Sep 17 00:00:00 2001 From: Brett Simmers Date: Thu, 4 Apr 2024 10:39:26 -0700 Subject: [PATCH 2/4] Try to fix check-c-globals --- Tools/c-analyzer/cpython/ignored.tsv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 965346b9b04a32..7463eeffc1378b 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -343,7 +343,7 @@ Python/ceval.c - _PyEval_BinaryOps - Python/ceval.c - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS - Python/codecs.c - Py_hexdigits - Python/codecs.c - ucnhash_capi - -Python/codecs.c _PyCodecRegistry_Init methods - +Python/codecs.c _PyCodecRegistry_EnsureInit methods - Python/compile.c - NO_LABEL - Python/compile.c - NO_LOCATION - Python/dynload_shlib.c - _PyImport_DynLoadFiletab - From 8076032203cd345ff60839b964898cb61df4f55b Mon Sep 17 00:00:00 2001 From: Brett Simmers Date: Wed, 10 Apr 2024 16:36:00 -0700 Subject: [PATCH 3/4] Initialize codec registry eagerly, greatly simplifying things --- Include/internal/pycore_codecs.h | 13 +-- .../internal/pycore_pyatomic_ft_wrappers.h | 7 -- Objects/object.c | 1 - Objects/unicodeobject.c | 6 +- Python/codecs.c | 105 ++++++------------ Python/pystate.c | 1 + Tools/c-analyzer/cpython/ignored.tsv | 2 +- 7 files changed, 44 insertions(+), 91 deletions(-) diff --git a/Include/internal/pycore_codecs.h b/Include/internal/pycore_codecs.h index 258de217632d4c..dd4e175cf5eeda 100644 --- a/Include/internal/pycore_codecs.h +++ b/Include/internal/pycore_codecs.h @@ -10,6 +10,11 @@ extern "C" { #include "pycore_lock.h" // PyMutex +/* Initialize codecs-related state for the given interpreter. Must be called + before any other _PyCodec* functions, and while only one thread is + active. */ +extern PyStatus _PyCodec_InitRegistry(PyInterpreterState *interp); + extern PyObject* _PyCodec_Lookup(const char *encoding); /* Text codec specific encoding and decoding API. @@ -65,15 +70,9 @@ struct codecs_state { #ifdef Py_GIL_DISABLED // Used to safely delete a specific item from search_path. PyMutex search_path_mutex; - - // Used to synchronize initialization of the PyObject* members above. - PyMutex init_mutex; #endif - // If an acquire load of initialized yields 1, all of the PyObject* members - // above will be set, and their values will not change until interpreter - // finalization. This allows common operations to freely read them without - // additional synchronization. + // Whether or not the rest of the state is initialized. int initialized; }; diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index 10d74958f86717..e441600d54e1aa 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -23,25 +23,18 @@ extern "C" { #define FT_ATOMIC_LOAD_SSIZE(value) _Py_atomic_load_ssize(&value) #define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) \ _Py_atomic_load_ssize_relaxed(&value) -#define FT_ATOMIC_LOAD_INT_ACQUIRE(value) _Py_atomic_load_int_acquire(&value) - #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) \ _Py_atomic_store_ptr_relaxed(&value, new_value) #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) \ _Py_atomic_store_ptr_release(&value, new_value) #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) \ _Py_atomic_store_ssize_relaxed(&value, new_value) -#define FT_ATOMIC_STORE_INT_RELEASE(value, new_value) \ - _Py_atomic_store_int_release(&value, new_value) #else #define FT_ATOMIC_LOAD_SSIZE(value) value #define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) value -#define FT_ATOMIC_LOAD_INT_ACQUIRE(value) value - #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) value = new_value #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) value = new_value -#define FT_ATOMIC_STORE_INT_RELEASE(value, new_value) value = new_value #endif #ifdef __cplusplus diff --git a/Objects/object.c b/Objects/object.c index 60642d899bcafa..06cddf7077c51b 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -467,7 +467,6 @@ void PyObject_CallFinalizer(PyObject *self) { PyTypeObject *tp = Py_TYPE(self); - if (tp->tp_finalize == NULL) return; /* tp_finalize should only be called once. */ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e412af5f797e7a..7d94ee85be1317 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -15452,7 +15452,11 @@ init_fs_encoding(PyThreadState *tstate) PyStatus _PyUnicode_InitEncodings(PyThreadState *tstate) { - PyStatus status = init_fs_encoding(tstate); + PyStatus status = _PyCodec_InitRegistry(tstate->interp); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + status = init_fs_encoding(tstate); if (_PyStatus_EXCEPTION(status)) { return status; } diff --git a/Python/codecs.c b/Python/codecs.c index 9eda246d34c0c9..a02e20f4683452 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -12,7 +12,6 @@ Copyright (c) Corporation for National Research Initiatives. #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_interp.h" // PyInterpreterState.codec_search_path #include "pycore_lock.h" // PyMutex -#include "pycore_pyatomic_ft_wrappers.h" #include "pycore_pyerrors.h" // _PyErr_FormatNote() #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI @@ -32,15 +31,10 @@ const char *Py_hexdigits = "0123456789abcdef"; */ -static int _PyCodecRegistry_EnsureInit(PyInterpreterState *); /* Forward */ - int PyCodec_Register(PyObject *search_function) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (_PyCodecRegistry_EnsureInit(interp) < 0) { - goto onError; - } - PyObject *search_path = interp->codecs.search_path; + assert(interp->codecs.initialized); if (search_function == NULL) { PyErr_BadArgument(); goto onError; @@ -52,7 +46,7 @@ int PyCodec_Register(PyObject *search_function) #ifdef Py_GIL_DISABLED PyMutex_Lock(&interp->codecs.search_path_mutex); #endif - int ret = PyList_Append(search_path, search_function); + int ret = PyList_Append(interp->codecs.search_path, search_function); #ifdef Py_GIL_DISABLED PyMutex_Unlock(&interp->codecs.search_path_mutex); #endif @@ -66,8 +60,9 @@ int PyCodec_Unregister(PyObject *search_function) { PyInterpreterState *interp = _PyInterpreterState_GET(); - /* Do nothing if codec data structures are not created yet. */ - if (FT_ATOMIC_LOAD_INT_ACQUIRE(interp->codecs.initialized) == 0) { + if (interp->codecs.initialized != 1) { + /* Do nothing if codecs state was cleared (only possible during + interpreter shutdown). */ return 0; } @@ -154,9 +149,7 @@ PyObject *_PyCodec_Lookup(const char *encoding) } PyInterpreterState *interp = _PyInterpreterState_GET(); - if (_PyCodecRegistry_EnsureInit(interp) < 0) { - return NULL; - } + assert(interp->codecs.initialized); /* Convert the encoding to a normalized Python string: all characters are converted to lower case, spaces and hyphens are @@ -623,9 +616,7 @@ PyObject *_PyCodec_DecodeText(PyObject *object, int PyCodec_RegisterError(const char *name, PyObject *error) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (_PyCodecRegistry_EnsureInit(interp) < 0) { - return -1; - } + assert(interp->codecs.initialized); if (!PyCallable_Check(error)) { PyErr_SetString(PyExc_TypeError, "handler must be callable"); return -1; @@ -640,9 +631,7 @@ int PyCodec_RegisterError(const char *name, PyObject *error) PyObject *PyCodec_LookupError(const char *name) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (_PyCodecRegistry_EnsureInit(interp) < 0) { - return NULL; - } + assert(interp->codecs.initialized); if (name==NULL) name = "strict"; @@ -1400,7 +1389,7 @@ static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc) return PyCodec_SurrogateEscapeErrors(exc); } -static int _PyCodecRegistry_EnsureInit(PyInterpreterState *interp) +PyStatus _PyCodec_InitRegistry(PyInterpreterState *interp) { static struct { const char *name; @@ -1488,74 +1477,42 @@ static int _PyCodecRegistry_EnsureInit(PyInterpreterState *interp) } }; - if (FT_ATOMIC_LOAD_INT_ACQUIRE(interp->codecs.initialized) == 1) { - return 0; + assert(interp->codecs.initialized == 0); + interp->codecs.search_path = PyList_New(0); + if (interp->codecs.search_path == NULL) { + return PyStatus_NoMemory(); } - - PyObject *search_path = NULL, *search_cache = NULL, *error_registry = NULL; - search_path = PyList_New(0); - if (search_path == NULL) { - goto error; + interp->codecs.search_cache = PyDict_New(); + if (interp->codecs.search_cache == NULL) { + return PyStatus_NoMemory(); } - search_cache = PyDict_New(); - if (search_cache == NULL) { - goto error; - } - error_registry = PyDict_New(); - if (error_registry == NULL) { - goto error; + interp->codecs.error_registry = PyDict_New(); + if (interp->codecs.error_registry == NULL) { + return PyStatus_NoMemory(); } for (size_t i = 0; i < Py_ARRAY_LENGTH(methods); ++i) { PyObject *func = PyCFunction_NewEx(&methods[i].def, NULL, NULL); if (func == NULL) { - goto error; + return PyStatus_NoMemory(); } - int res = PyDict_SetItemString(error_registry, methods[i].name, func); + int res = PyDict_SetItemString(interp->codecs.error_registry, + methods[i].name, func); Py_DECREF(func); if (res < 0) { - goto error; + return PyStatus_Error("Failed to insert into codec error registry"); } } -#ifdef Py_GIL_DISABLED - PyMutex_Lock(&interp->codecs.init_mutex); -#endif - int do_import = 1; - if (interp->codecs.initialized == 0) { - interp->codecs.search_path = search_path; - interp->codecs.search_cache = search_cache; - interp->codecs.error_registry = error_registry; - FT_ATOMIC_STORE_INT_RELEASE(interp->codecs.initialized, 1); - } else { - // Another thread initialized everything while we were preparing. - Py_DECREF(search_path); - Py_DECREF(search_cache); - Py_DECREF(error_registry); - do_import = 0; - } - - // Importing `encodings' can execute arbitrary code and will call back into - // this module to register codec search functions. Do it once everything is - // initialized and we hold no locks. Other Python code may register other - // codecs before `encodings' is finished importing; this is true with or - // without the GIL. -#ifdef Py_GIL_DISABLED - PyMutex_Unlock(&interp->codecs.init_mutex); -#endif + interp->codecs.initialized = 1; - if (do_import) { - PyObject *mod = PyImport_ImportModule("encodings"); - if (mod == NULL) { - return -1; - } - Py_DECREF(mod); + // Importing `encodings' will call back into this module to register codec + // search functions, so this is done after everything else is initialized. + PyObject *mod = PyImport_ImportModule("encodings"); + if (mod == NULL) { + return PyStatus_Error("Failed to import encodings module"); } - return 0; + Py_DECREF(mod); - error: - Py_XDECREF(search_path); - Py_XDECREF(search_cache); - Py_XDECREF(error_registry); - return -1; + return PyStatus_Ok(); } diff --git a/Python/pystate.c b/Python/pystate.c index dcc8cf40c8ac4d..b438332cb38631 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -824,6 +824,7 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) Py_CLEAR(interp->codecs.search_path); Py_CLEAR(interp->codecs.search_cache); Py_CLEAR(interp->codecs.error_registry); + interp->codecs.initialized = 0; assert(interp->imports.modules == NULL); assert(interp->imports.modules_by_index == NULL); diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 7463eeffc1378b..11d91b01b8f956 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -343,7 +343,7 @@ Python/ceval.c - _PyEval_BinaryOps - Python/ceval.c - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS - Python/codecs.c - Py_hexdigits - Python/codecs.c - ucnhash_capi - -Python/codecs.c _PyCodecRegistry_EnsureInit methods - +Python/codecs.c _PyCodec_InitRegistry methods - Python/compile.c - NO_LABEL - Python/compile.c - NO_LOCATION - Python/dynload_shlib.c - _PyImport_DynLoadFiletab - From 4d5bffc741c6c501fd11cd418b68b5c35ce8ef56 Mon Sep 17 00:00:00 2001 From: Brett Simmers Date: Mon, 15 Apr 2024 11:19:08 -0700 Subject: [PATCH 4/4] Final review comments, and add missing newline after _PyCodec_InitRegistry's return type --- Include/internal/pycore_codecs.h | 10 +++++++--- Objects/object.c | 1 + Python/codecs.c | 23 +++++++++++------------ Python/pystate.c | 5 +---- 4 files changed, 20 insertions(+), 19 deletions(-) diff --git a/Include/internal/pycore_codecs.h b/Include/internal/pycore_codecs.h index dd4e175cf5eeda..5e2d5c5ce9d868 100644 --- a/Include/internal/pycore_codecs.h +++ b/Include/internal/pycore_codecs.h @@ -10,11 +10,15 @@ extern "C" { #include "pycore_lock.h" // PyMutex -/* Initialize codecs-related state for the given interpreter. Must be called - before any other _PyCodec* functions, and while only one thread is - active. */ +/* Initialize codecs-related state for the given interpreter, including + registering the first codec search function. Must be called before any other + PyCodec-related functions, and while only one thread is active. */ extern PyStatus _PyCodec_InitRegistry(PyInterpreterState *interp); +/* Finalize codecs-related state for the given interpreter. No PyCodec-related + functions other than PyCodec_Unregister() may be called after this. */ +extern void _PyCodec_Fini(PyInterpreterState *interp); + extern PyObject* _PyCodec_Lookup(const char *encoding); /* Text codec specific encoding and decoding API. diff --git a/Objects/object.c b/Objects/object.c index 4c1676d77c471e..016d0e1ded92d8 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -473,6 +473,7 @@ void PyObject_CallFinalizer(PyObject *self) { PyTypeObject *tp = Py_TYPE(self); + if (tp->tp_finalize == NULL) return; /* tp_finalize should only be called once. */ diff --git a/Python/codecs.c b/Python/codecs.c index a02e20f4683452..bed245366f9234 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -20,17 +20,6 @@ const char *Py_hexdigits = "0123456789abcdef"; /* --- Codec Registry ----------------------------------------------------- */ -/* Import the standard encodings package which will register the first - codec search function. - - This is done in a lazy way so that the Unicode implementation does - not downgrade startup time of scripts not needing it. - - ImportErrors are silently ignored by this function. Only one try is - made. - -*/ - int PyCodec_Register(PyObject *search_function) { PyInterpreterState *interp = _PyInterpreterState_GET(); @@ -1389,7 +1378,8 @@ static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc) return PyCodec_SurrogateEscapeErrors(exc); } -PyStatus _PyCodec_InitRegistry(PyInterpreterState *interp) +PyStatus +_PyCodec_InitRegistry(PyInterpreterState *interp) { static struct { const char *name; @@ -1516,3 +1506,12 @@ PyStatus _PyCodec_InitRegistry(PyInterpreterState *interp) return PyStatus_Ok(); } + +void +_PyCodec_Fini(PyInterpreterState *interp) +{ + Py_CLEAR(interp->codecs.search_path); + Py_CLEAR(interp->codecs.search_cache); + Py_CLEAR(interp->codecs.error_registry); + interp->codecs.initialized = 0; +} diff --git a/Python/pystate.c b/Python/pystate.c index b1c83cf2daa5be..811d192820bdc8 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -838,10 +838,7 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) } PyConfig_Clear(&interp->config); - Py_CLEAR(interp->codecs.search_path); - Py_CLEAR(interp->codecs.search_cache); - Py_CLEAR(interp->codecs.error_registry); - interp->codecs.initialized = 0; + _PyCodec_Fini(interp); assert(interp->imports.modules == NULL); assert(interp->imports.modules_by_index == NULL); pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy