From 1fc114930ec782ca7058259b80594d2aa8613c01 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 29 Apr 2024 18:22:05 -0600 Subject: [PATCH 01/23] Store the cached def in a wrapping struct rather than directly. --- Python/import.c | 61 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 16 deletions(-) diff --git a/Python/import.c b/Python/import.c index f120a3841b495d..7e8622ca7e61b5 100644 --- a/Python/import.c +++ b/Python/import.c @@ -940,6 +940,11 @@ extensions_lock_release(void) dictionary, to avoid loading shared libraries twice. */ +struct extensions_cache_value { + PyModuleDef *def; +// PyModuleDef _def; +}; + static void * hashtable_key_from_2_strings(PyObject *str1, PyObject *str2, const char sep) { @@ -984,6 +989,18 @@ hashtable_destroy_str(void *ptr) PyMem_RawFree(ptr); } +static void +hashtable_destroy_value(void *value) +{ + if (value != NULL) { + struct extensions_cache_value *v + = (struct extensions_cache_value *)value; + /* There's no need to decref the def since it's immortal. */ + assert(v->def == NULL || _Py_IsImmortal(v->def)); + PyMem_RawFree(value); + } +} + #define HTSEP ':' static int @@ -994,8 +1011,7 @@ _extensions_cache_init(void) hashtable_hash_str, hashtable_compare_str, hashtable_destroy_str, // key - /* There's no need to decref the def since it's immortal. */ - NULL, // value + hashtable_destroy_value, // value &alloc ); if (EXTENSIONS.hashtable == NULL) { @@ -1027,10 +1043,10 @@ _extensions_cache_find_unlocked(PyObject *path, PyObject *name, return entry; } -static PyModuleDef * +static struct extensions_cache_value * _extensions_cache_get(PyObject *path, PyObject *name) { - PyModuleDef *def = NULL; + struct extensions_cache_value *value = NULL; extensions_lock_acquire(); _Py_hashtable_entry_t *entry = @@ -1039,11 +1055,11 @@ _extensions_cache_get(PyObject *path, PyObject *name) /* It was never added. */ goto finally; } - def = (PyModuleDef *)entry->value; + value = (struct extensions_cache_value *)entry->value; finally: extensions_lock_release(); - return def; + return value; } static int @@ -1051,6 +1067,15 @@ _extensions_cache_set(PyObject *path, PyObject *name, PyModuleDef *def) { int res = -1; assert(def != NULL); + + struct extensions_cache_value *value + = PyMem_RawMalloc(sizeof(struct extensions_cache_value)); + if (value == NULL) { + PyErr_NoMemory(); + return -1; + } + value->def = def; + extensions_lock_acquire(); if (EXTENSIONS.hashtable == NULL) { @@ -1065,7 +1090,7 @@ _extensions_cache_set(PyObject *path, PyObject *name, PyModuleDef *def) _extensions_cache_find_unlocked(path, name, &key); if (entry == NULL) { /* It was never added. */ - if (_Py_hashtable_set(EXTENSIONS.hashtable, key, def) < 0) { + if (_Py_hashtable_set(EXTENSIONS.hashtable, key, value) < 0) { PyErr_NoMemory(); goto finally; } @@ -1074,11 +1099,11 @@ _extensions_cache_set(PyObject *path, PyObject *name, PyModuleDef *def) } else if (entry->value == NULL) { /* It was previously deleted. */ - entry->value = def; + entry->value = value; } else { /* We expect it to be static, so it must be the same pointer. */ - assert((PyModuleDef *)entry->value == def); + assert(((struct extensions_cache_value *)entry->value)->def == def); /* It was already added. */ already_set = 1; } @@ -1122,7 +1147,7 @@ _extensions_cache_delete(PyObject *path, PyObject *name) However, this decref would be problematic if the module def were dynamically allocated, it were the last ref, and this function were called with an interpreter other than the def's owner. */ - assert(_Py_IsImmortal(entry->value)); + hashtable_destroy_value(entry->value); entry->value = NULL; finally: @@ -1338,8 +1363,9 @@ update_global_state_for_extension(PyThreadState *tstate, // XXX Why special-case the main interpreter? if (_Py_IsMainInterpreter(tstate->interp) || def->m_size == -1) { #ifndef NDEBUG - PyModuleDef *cached = _extensions_cache_get(path, name); - assert(cached == NULL || cached == def); + struct extensions_cache_value *cached + = _extensions_cache_get(path, name); + assert(cached == NULL || cached->def == def); #endif if (_extensions_cache_set(path, name, def) < 0) { return -1; @@ -1471,10 +1497,12 @@ import_find_extension(PyThreadState *tstate, struct _Py_ext_module_loader_info *info) { /* Only single-phase init modules will be in the cache. */ - PyModuleDef *def = _extensions_cache_get(info->path, info->name); - if (def == NULL) { + struct extensions_cache_value *value + = _extensions_cache_get(info->path, info->name); + if (value == NULL) { return NULL; } + PyModuleDef *def = value->def; assert_singlephase(def); /* It may have been successfully imported previously @@ -1594,13 +1622,14 @@ static int clear_singlephase_extension(PyInterpreterState *interp, PyObject *name, PyObject *path) { - PyModuleDef *def = _extensions_cache_get(path, name); - if (def == NULL) { + struct extensions_cache_value *value = _extensions_cache_get(path, name); + if (value == NULL) { if (PyErr_Occurred()) { return -1; } return 0; } + PyModuleDef *def = value->def; /* Clear data set when the module was initially loaded. */ def->m_base.m_init = NULL; From 254e1f1eece9b6765fccb6012b111fabc59fe7c3 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 1 May 2024 18:15:36 -0600 Subject: [PATCH 02/23] Pass the cached value around where appropriate. --- Python/import.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/Python/import.c b/Python/import.c index 7e8622ca7e61b5..ebeaacaf3052fc 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1400,9 +1400,12 @@ finish_singlephase_extension(PyThreadState *tstate, static PyObject * -reload_singlephase_extension(PyThreadState *tstate, PyModuleDef *def, +reload_singlephase_extension(PyThreadState *tstate, + struct extensions_cache_value *cached, struct _Py_ext_module_loader_info *info) { + PyModuleDef *def = cached->def; + assert(def != NULL); assert_singlephase(def); PyObject *mod = NULL; @@ -1497,13 +1500,12 @@ import_find_extension(PyThreadState *tstate, struct _Py_ext_module_loader_info *info) { /* Only single-phase init modules will be in the cache. */ - struct extensions_cache_value *value + struct extensions_cache_value *cached = _extensions_cache_get(info->path, info->name); - if (value == NULL) { + if (cached == NULL) { return NULL; } - PyModuleDef *def = value->def; - assert_singlephase(def); + assert_singlephase(cached->def); /* It may have been successfully imported previously in an interpreter that allows legacy modules @@ -1514,7 +1516,7 @@ import_find_extension(PyThreadState *tstate, return NULL; } - PyObject *mod = reload_singlephase_extension(tstate, def, info); + PyObject *mod = reload_singlephase_extension(tstate, cached, info); if (mod == NULL) { return NULL; } @@ -1622,14 +1624,14 @@ static int clear_singlephase_extension(PyInterpreterState *interp, PyObject *name, PyObject *path) { - struct extensions_cache_value *value = _extensions_cache_get(path, name); - if (value == NULL) { + struct extensions_cache_value *cached = _extensions_cache_get(path, name); + if (cached == NULL) { if (PyErr_Occurred()) { return -1; } return 0; } - PyModuleDef *def = value->def; + PyModuleDef *def = cached->def; /* Clear data set when the module was initially loaded. */ def->m_base.m_init = NULL; @@ -1732,7 +1734,12 @@ create_builtin(PyThreadState *tstate, PyObject *name, PyObject *spec) PyObject *mod = import_find_extension(tstate, &info); if (mod != NULL) { assert(!_PyErr_Occurred(tstate)); - assert_singlephase(_PyModule_GetDef(mod)); +#ifndef NDEBUG + struct extensions_cache_value *cached + = _extensions_cache_get(info.path, info.name); +#endif + assert(cached->def == _PyModule_GetDef(mod)); + assert_singlephase(cached->def); goto finally; } else if (_PyErr_Occurred(tstate)) { @@ -4086,7 +4093,13 @@ _imp_create_dynamic_impl(PyObject *module, PyObject *spec, PyObject *file) mod = import_find_extension(tstate, &info); if (mod != NULL) { assert(!_PyErr_Occurred(tstate)); - assert_singlephase(_PyModule_GetDef(mod)); +#ifndef NDEBUG + struct extensions_cache_value *cached + = _extensions_cache_get(info.path, info.name); +#endif + assert(_PyModule_GetDef(mod) == NULL + || cached->def == _PyModule_GetDef(mod)); + assert_singlephase(cached->def); goto finally; } else if (_PyErr_Occurred(tstate)) { From 06943187adbb0203c0f8dd24829e092a93164914 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 1 May 2024 13:16:17 -0600 Subject: [PATCH 03/23] Pass the cached value to check_singlephase(). --- Python/import.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/Python/import.c b/Python/import.c index ebeaacaf3052fc..fefb9c9743e668 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1277,13 +1277,16 @@ _get_extension_kind(PyModuleDef *def, bool check_size) || kind == _Py_ext_module_kind_UNKNOWN); \ } while (0) -#define assert_singlephase(def) \ - assert_singlephase_def(def) +#define assert_singlephase(cached) \ + do { \ + _Py_ext_module_kind kind = _get_extension_kind(cached->def, true); \ + assert(kind == _Py_ext_module_kind_SINGLEPHASE); \ + } while (0) #else /* defined(NDEBUG) */ #define assert_multiphase_def(def) #define assert_singlephase_def(def) -#define assert_singlephase(def) +#define assert_singlephase(cached) #endif @@ -1406,7 +1409,7 @@ reload_singlephase_extension(PyThreadState *tstate, { PyModuleDef *def = cached->def; assert(def != NULL); - assert_singlephase(def); + assert_singlephase(cached); PyObject *mod = NULL; /* It may have been successfully imported previously @@ -1505,7 +1508,8 @@ import_find_extension(PyThreadState *tstate, if (cached == NULL) { return NULL; } - assert_singlephase(cached->def); + assert(cached->def != NULL); + assert_singlephase(cached); /* It may have been successfully imported previously in an interpreter that allows legacy modules @@ -1736,10 +1740,10 @@ create_builtin(PyThreadState *tstate, PyObject *name, PyObject *spec) assert(!_PyErr_Occurred(tstate)); #ifndef NDEBUG struct extensions_cache_value *cached - = _extensions_cache_get(info.path, info.name); + = _extensions_cache_get(info.path, info.name); #endif assert(cached->def == _PyModule_GetDef(mod)); - assert_singlephase(cached->def); + assert_singlephase(cached); goto finally; } else if (_PyErr_Occurred(tstate)) { @@ -4095,11 +4099,11 @@ _imp_create_dynamic_impl(PyObject *module, PyObject *spec, PyObject *file) assert(!_PyErr_Occurred(tstate)); #ifndef NDEBUG struct extensions_cache_value *cached - = _extensions_cache_get(info.path, info.name); + = _extensions_cache_get(info.path, info.name); #endif assert(_PyModule_GetDef(mod) == NULL || cached->def == _PyModule_GetDef(mod)); - assert_singlephase(cached->def); + assert_singlephase(cached); goto finally; } else if (_PyErr_Occurred(tstate)) { From 9ca330bacf0690305b970b3399edf68d553905c8 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 30 Apr 2024 16:45:48 -0600 Subject: [PATCH 04/23] Add extensions_cache_value helpers. --- Python/import.c | 136 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 96 insertions(+), 40 deletions(-) diff --git a/Python/import.c b/Python/import.c index fefb9c9743e668..f6aa438b8889c0 100644 --- a/Python/import.c +++ b/Python/import.c @@ -924,6 +924,7 @@ extensions_lock_release(void) PyMutex_Unlock(&_PyRuntime.imports.extensions.mutex); } + /* Magic for extension modules (built-in as well as dynamically loaded). To prevent initializing an extension module more than once, we keep a static dictionary 'extensions' keyed by the tuple @@ -940,11 +941,69 @@ extensions_lock_release(void) dictionary, to avoid loading shared libraries twice. */ +typedef PyDictObject *cached_m_dict_t; + struct extensions_cache_value { PyModuleDef *def; -// PyModuleDef _def; }; +static struct extensions_cache_value * +alloc_extensions_cache_value(void) +{ + struct extensions_cache_value *value + = PyMem_RawMalloc(sizeof(struct extensions_cache_value)); + if (value == NULL) { + PyErr_NoMemory(); + return NULL; + } + *value = (struct extensions_cache_value){0}; + return value; +} + +static void +free_extensions_cache_value(struct extensions_cache_value *value) +{ + PyMem_RawFree(value); +} + +static int +update_extensions_cache_value(struct extensions_cache_value *value, + PyModuleDef *def) +{ + assert(def != NULL); + /* We expect it to be static, so it must be the same pointer. */ + assert(value->def == NULL || value->def == def); + + /* We assume that all module defs are statically allocated + and will never be freed. Otherwise, we would incref here. */ + _Py_SetImmortalUntracked((PyObject *)def); + + *value = (struct extensions_cache_value){ + .def=def, + }; + return 0; +} + +static void +clear_extensions_cache_value(struct extensions_cache_value *value) +{ + assert(value != NULL); + /* If we hadn't made the stored defs immortal, we would decref here. + However, this decref would be problematic if the module def were + dynamically allocated, it were the last ref, and this function + were called with an interpreter other than the def's owner. */ + assert(value->def == NULL || _Py_IsImmortal(value->def)); +} + +static void +del_extensions_cache_value(struct extensions_cache_value *value) +{ + if (value != NULL) { + clear_extensions_cache_value(value); + free_extensions_cache_value(value); + } +} + static void * hashtable_key_from_2_strings(PyObject *str1, PyObject *str2, const char sep) { @@ -958,6 +1017,7 @@ hashtable_key_from_2_strings(PyObject *str1, PyObject *str2, const char sep) assert(SIZE_MAX - str1_len - str2_len > 2); size_t size = str1_len + 1 + str2_len + 1; + // XXX Use a buffer if it's a temp value (every case but "set"). char *key = PyMem_RawMalloc(size); if (key == NULL) { PyErr_NoMemory(); @@ -989,18 +1049,6 @@ hashtable_destroy_str(void *ptr) PyMem_RawFree(ptr); } -static void -hashtable_destroy_value(void *value) -{ - if (value != NULL) { - struct extensions_cache_value *v - = (struct extensions_cache_value *)value; - /* There's no need to decref the def since it's immortal. */ - assert(v->def == NULL || _Py_IsImmortal(v->def)); - PyMem_RawFree(value); - } -} - #define HTSEP ':' static int @@ -1011,7 +1059,7 @@ _extensions_cache_init(void) hashtable_hash_str, hashtable_compare_str, hashtable_destroy_str, // key - hashtable_destroy_value, // value + (_Py_hashtable_destroy_func)del_extensions_cache_value, // value &alloc ); if (EXTENSIONS.hashtable == NULL) { @@ -1043,6 +1091,7 @@ _extensions_cache_find_unlocked(PyObject *path, PyObject *name, return entry; } +/* This can only fail with "out of memory". */ static struct extensions_cache_value * _extensions_cache_get(PyObject *path, PyObject *name) { @@ -1062,19 +1111,20 @@ _extensions_cache_get(PyObject *path, PyObject *name) return value; } +/* This can only fail with "out of memory". */ static int _extensions_cache_set(PyObject *path, PyObject *name, PyModuleDef *def) { int res = -1; assert(def != NULL); + void *key = NULL; + struct extensions_cache_value *value = NULL; + struct extensions_cache_value *newvalue = NULL; - struct extensions_cache_value *value - = PyMem_RawMalloc(sizeof(struct extensions_cache_value)); - if (value == NULL) { - PyErr_NoMemory(); + struct extensions_cache_value updates = {0}; + if (update_extensions_cache_value(&updates, def) < 0) { return -1; } - value->def = def; extensions_lock_acquire(); @@ -1084,35 +1134,35 @@ _extensions_cache_set(PyObject *path, PyObject *name, PyModuleDef *def) } } - int already_set = 0; - void *key = NULL; _Py_hashtable_entry_t *entry = _extensions_cache_find_unlocked(path, name, &key); if (entry == NULL) { /* It was never added. */ - if (_Py_hashtable_set(EXTENSIONS.hashtable, key, value) < 0) { + newvalue = alloc_extensions_cache_value(); + if (newvalue == NULL) { + goto finally; + } + if (_Py_hashtable_set(EXTENSIONS.hashtable, key, newvalue) < 0) { PyErr_NoMemory(); goto finally; } + value = newvalue; /* The hashtable owns the key now. */ key = NULL; } - else if (entry->value == NULL) { - /* It was previously deleted. */ - entry->value = value; - } else { - /* We expect it to be static, so it must be the same pointer. */ - assert(((struct extensions_cache_value *)entry->value)->def == def); - /* It was already added. */ - already_set = 1; + value = (struct extensions_cache_value *)entry->value; + if (value == NULL) { + /* It was previously deleted. */ + newvalue = alloc_extensions_cache_value(); + if (newvalue == NULL) { + goto finally; + } + value = newvalue; + entry->value = value; + } } - if (!already_set) { - /* We assume that all module defs are statically allocated - and will never be freed. Otherwise, we would incref here. */ - _Py_SetImmortal((PyObject *)def); - } res = 0; finally: @@ -1120,6 +1170,14 @@ _extensions_cache_set(PyObject *path, PyObject *name, PyModuleDef *def) if (key != NULL) { hashtable_destroy_str(key); } + + if (value != NULL) { + *value = updates; + } + else if (newvalue != NULL) { + del_extensions_cache_value(newvalue); + } + return res; } @@ -1143,13 +1201,11 @@ _extensions_cache_delete(PyObject *path, PyObject *name) /* It was already removed. */ goto finally; } - /* If we hadn't made the stored defs immortal, we would decref here. - However, this decref would be problematic if the module def were - dynamically allocated, it were the last ref, and this function - were called with an interpreter other than the def's owner. */ - hashtable_destroy_value(entry->value); + struct extensions_cache_value *value = entry->value; entry->value = NULL; + del_extensions_cache_value(value); + finally: extensions_lock_release(); } From 7e0c4ee8d1791958c8d04297d36be8dfb2bea68c Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 30 Apr 2024 17:00:29 -0600 Subject: [PATCH 05/23] Add get_cached_m_dict(). --- Python/import.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/Python/import.c b/Python/import.c index f6aa438b8889c0..7e381907fef928 100644 --- a/Python/import.c +++ b/Python/import.c @@ -966,6 +966,15 @@ free_extensions_cache_value(struct extensions_cache_value *value) PyMem_RawFree(value); } +static PyObject * +get_cached_m_dict(struct extensions_cache_value *value) +{ + assert(value != NULL); + assert(value->def != NULL); + Py_XINCREF(value->def->m_base.m_copy); + return value->def->m_base.m_copy; +} + static int update_extensions_cache_value(struct extensions_cache_value *value, PyModuleDef *def) @@ -1261,11 +1270,11 @@ get_core_module_dict(PyInterpreterState *interp, if (path == name) { assert(!PyErr_Occurred()); if (PyUnicode_CompareWithASCIIString(name, "sys") == 0) { - return interp->sysdict_copy; + return Py_NewRef(interp->sysdict_copy); } assert(!PyErr_Occurred()); if (PyUnicode_CompareWithASCIIString(name, "builtins") == 0) { - return interp->builtins_copy; + return Py_NewRef(interp->builtins_copy); } assert(!PyErr_Occurred()); } @@ -1466,6 +1475,7 @@ reload_singlephase_extension(PyThreadState *tstate, PyModuleDef *def = cached->def; assert(def != NULL); assert_singlephase(cached); + PyModInitFunction m_init = def->m_base.m_init; PyObject *mod = NULL; /* It may have been successfully imported previously @@ -1479,8 +1489,13 @@ reload_singlephase_extension(PyThreadState *tstate, PyObject *modules = get_modules_dict(tstate, true); if (def->m_size == -1) { - PyObject *m_copy = def->m_base.m_copy; /* Module does not support repeated initialization */ + assert(m_init == NULL); + assert(def->m_base.m_init == NULL); + // XXX Copying the cached dict may break interpreter isolation. + // We could solve this by temporarily acquiring the original + // interpreter's GIL. + PyObject *m_copy = get_cached_m_dict(cached); if (m_copy == NULL) { /* It might be a core module (e.g. sys & builtins), for which we don't set m_copy. */ @@ -1493,14 +1508,18 @@ reload_singlephase_extension(PyThreadState *tstate, } mod = import_add_module(tstate, info->name); if (mod == NULL) { + Py_DECREF(m_copy); return NULL; } PyObject *mdict = PyModule_GetDict(mod); if (mdict == NULL) { + Py_DECREF(m_copy); Py_DECREF(mod); return NULL; } - if (PyDict_Update(mdict, m_copy)) { + int rc = PyDict_Update(mdict, m_copy); + Py_DECREF(m_copy); + if (rc < 0) { Py_DECREF(mod); return NULL; } From c1c1379c6811fbd7f57b4a821df2252b5fb23cfb Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 1 May 2024 11:30:35 -0600 Subject: [PATCH 06/23] Track ext module origin. --- Include/internal/pycore_importdl.h | 12 +++++++++++- Python/import.c | 23 ++++++++++++++++++----- Python/importdl.c | 24 +++++++++++++++++++++--- 3 files changed, 50 insertions(+), 9 deletions(-) diff --git a/Include/internal/pycore_importdl.h b/Include/internal/pycore_importdl.h index b0af28da34e087..e5f222b371a113 100644 --- a/Include/internal/pycore_importdl.h +++ b/Include/internal/pycore_importdl.h @@ -22,6 +22,11 @@ typedef enum ext_module_kind { _Py_ext_module_kind_INVALID = 3, } _Py_ext_module_kind; +typedef enum ext_module_origin { + _Py_ext_module_origin_CORE = 1, + _Py_ext_module_origin_BUILTIN = 2, + _Py_ext_module_origin_DYNAMIC = 3, +} _Py_ext_module_origin; /* Input for loading an extension module. */ struct _Py_ext_module_loader_info { @@ -34,6 +39,7 @@ struct _Py_ext_module_loader_info { /* path is always a borrowed ref of name or filename, * depending on if it's builtin or not. */ PyObject *path; + _Py_ext_module_origin origin; const char *hook_prefix; const char *newcontext; }; @@ -42,7 +48,11 @@ extern void _Py_ext_module_loader_info_clear( extern int _Py_ext_module_loader_info_init( struct _Py_ext_module_loader_info *info, PyObject *name, - PyObject *filename); + PyObject *filename, + _Py_ext_module_origin origin); +extern int _Py_ext_module_loader_info_init_for_core( + struct _Py_ext_module_loader_info *p_info, + PyObject *name); extern int _Py_ext_module_loader_info_init_for_builtin( struct _Py_ext_module_loader_info *p_info, PyObject *name); diff --git a/Python/import.c b/Python/import.c index 7e381907fef928..9dddc9259dd3f4 100644 --- a/Python/import.c +++ b/Python/import.c @@ -945,6 +945,8 @@ typedef PyDictObject *cached_m_dict_t; struct extensions_cache_value { PyModuleDef *def; + + _Py_ext_module_origin origin; }; static struct extensions_cache_value * @@ -977,7 +979,7 @@ get_cached_m_dict(struct extensions_cache_value *value) static int update_extensions_cache_value(struct extensions_cache_value *value, - PyModuleDef *def) + PyModuleDef *def, _Py_ext_module_origin origin) { assert(def != NULL); /* We expect it to be static, so it must be the same pointer. */ @@ -989,6 +991,7 @@ update_extensions_cache_value(struct extensions_cache_value *value, *value = (struct extensions_cache_value){ .def=def, + .origin=origin, }; return 0; } @@ -1122,16 +1125,18 @@ _extensions_cache_get(PyObject *path, PyObject *name) /* This can only fail with "out of memory". */ static int -_extensions_cache_set(PyObject *path, PyObject *name, PyModuleDef *def) +_extensions_cache_set(PyObject *path, PyObject *name, + PyModuleDef *def,_Py_ext_module_origin origin) { int res = -1; assert(def != NULL); + assert((origin == _Py_ext_module_origin_DYNAMIC) == (name != path)); void *key = NULL; struct extensions_cache_value *value = NULL; struct extensions_cache_value *newvalue = NULL; struct extensions_cache_value updates = {0}; - if (update_extensions_cache_value(&updates, def) < 0) { + if (update_extensions_cache_value(&updates, def, origin) < 0) { return -1; } @@ -1358,6 +1363,7 @@ _get_extension_kind(PyModuleDef *def, bool check_size) struct singlephase_global_update { PyObject *m_dict; PyModInitFunction m_init; + _Py_ext_module_origin origin; }; static int @@ -1435,7 +1441,11 @@ update_global_state_for_extension(PyThreadState *tstate, = _extensions_cache_get(path, name); assert(cached == NULL || cached->def == def); #endif - if (_extensions_cache_set(path, name, def) < 0) { + if (_extensions_cache_set( + path, name, def, singlephase->origin) < 0) + { + // XXX Ignore this error? Doing so would effectively + // mark the module as not loadable. return -1; } } @@ -1660,7 +1670,9 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, } /* Update global import state. */ - struct singlephase_global_update singlephase = {0}; + struct singlephase_global_update singlephase = { + .origin=info->origin, + }; // gh-88216: Extensions and def->m_base.m_copy can be updated // when the extension module doesn't support sub-interpreters. if (def->m_size == -1) { @@ -1766,6 +1778,7 @@ _PyImport_FixupBuiltin(PyThreadState *tstate, PyObject *mod, const char *name, struct singlephase_global_update singlephase = { /* We don't want def->m_base.m_copy populated. */ .m_dict=NULL, + .origin=_Py_ext_module_origin_CORE, }; if (update_global_state_for_extension( tstate, nameobj, nameobj, def, &singlephase) < 0) diff --git a/Python/importdl.c b/Python/importdl.c index a3091946bc94bf..38f56db4b4cc96 100644 --- a/Python/importdl.c +++ b/Python/importdl.c @@ -111,9 +111,12 @@ _Py_ext_module_loader_info_clear(struct _Py_ext_module_loader_info *info) int _Py_ext_module_loader_info_init(struct _Py_ext_module_loader_info *p_info, - PyObject *name, PyObject *filename) + PyObject *name, PyObject *filename, + _Py_ext_module_origin origin) { - struct _Py_ext_module_loader_info info = {0}; + struct _Py_ext_module_loader_info info = { + .origin=origin, + }; assert(name != NULL); if (!PyUnicode_Check(name)) { @@ -183,12 +186,25 @@ _Py_ext_module_loader_info_init_for_builtin( .name_encoded=name_encoded, /* We won't need filename. */ .path=name, + .origin=_Py_ext_module_origin_BUILTIN, .hook_prefix=ascii_only_prefix, .newcontext=NULL, }; return 0; } +int +_Py_ext_module_loader_info_init_for_core( + struct _Py_ext_module_loader_info *info, + PyObject *name) +{ + if (_Py_ext_module_loader_info_init_for_builtin(info, name) < 0) { + return -1; + } + info->origin = _Py_ext_module_origin_CORE; + return 0; +} + int _Py_ext_module_loader_info_init_from_spec( struct _Py_ext_module_loader_info *p_info, @@ -203,7 +219,9 @@ _Py_ext_module_loader_info_init_from_spec( Py_DECREF(name); return -1; } - int err = _Py_ext_module_loader_info_init(p_info, name, filename); + /* We could also accommodate builtin modules here without much trouble. */ + _Py_ext_module_origin origin = _Py_ext_module_origin_DYNAMIC; + int err = _Py_ext_module_loader_info_init(p_info, name, filename, origin); Py_DECREF(name); Py_DECREF(filename); return err; From 8485fb5c24f7cc1b89b2b45e4536d1fab4081466 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 1 May 2024 11:35:51 -0600 Subject: [PATCH 07/23] Look up core module dicts in get_cached_m_dict(). --- Python/import.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/Python/import.c b/Python/import.c index 9dddc9259dd3f4..e22b2eab94d0ce 100644 --- a/Python/import.c +++ b/Python/import.c @@ -968,13 +968,24 @@ free_extensions_cache_value(struct extensions_cache_value *value) PyMem_RawFree(value); } +static PyObject * get_core_module_dict( + PyInterpreterState *interp, PyObject *name, PyObject *path); + static PyObject * -get_cached_m_dict(struct extensions_cache_value *value) +get_cached_m_dict(struct extensions_cache_value *value, + PyObject *name, PyObject *path) { assert(value != NULL); + PyInterpreterState *interp = _PyInterpreterState_GET(); + /* It might be a core module (e.g. sys & builtins), + for which we don't cache m_dict. */ + if (value->origin == _Py_ext_module_origin_CORE) { + return get_core_module_dict(interp, name, path); + } assert(value->def != NULL); - Py_XINCREF(value->def->m_base.m_copy); - return value->def->m_base.m_copy; + PyObject *m_dict = value->def->m_base.m_copy; + Py_XINCREF(m_dict); + return m_dict; } static int @@ -1505,16 +1516,10 @@ reload_singlephase_extension(PyThreadState *tstate, // XXX Copying the cached dict may break interpreter isolation. // We could solve this by temporarily acquiring the original // interpreter's GIL. - PyObject *m_copy = get_cached_m_dict(cached); + PyObject *m_copy = get_cached_m_dict(cached, info->name, info->path); if (m_copy == NULL) { - /* It might be a core module (e.g. sys & builtins), - for which we don't set m_copy. */ - m_copy = get_core_module_dict( - tstate->interp, info->name, info->path); - if (m_copy == NULL) { - assert(!PyErr_Occurred()); - return NULL; - } + assert(!PyErr_Occurred()); + return NULL; } mod = import_add_module(tstate, info->name); if (mod == NULL) { From abbddde74a363267409368a143487431572c122c Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 30 Apr 2024 18:50:08 -0600 Subject: [PATCH 08/23] Pass m_dict to _extensions_cache_set(). --- Python/import.c | 127 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 89 insertions(+), 38 deletions(-) diff --git a/Python/import.c b/Python/import.c index e22b2eab94d0ce..5529402ca4ae60 100644 --- a/Python/import.c +++ b/Python/import.c @@ -968,6 +968,56 @@ free_extensions_cache_value(struct extensions_cache_value *value) PyMem_RawFree(value); } +static int +set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) +{ + assert(value != NULL); + assert(value->def != NULL); + + PyObject *copied = NULL; + if (m_dict != NULL) { + assert(PyDict_Check(m_dict)); + copied = PyDict_Copy(m_dict); + if (copied == NULL) { + /* We expect this can only be "out of memory". */ + return -1; + } + } + + /* XXX gh-88216: The copied dict is owned by the current + * interpreter. That's a problem if the interpreter has + * its own obmalloc state or if the module is successfully + * imported into such an interpreter. If the interpreter + * has its own GIL then there may be data races and + * PyImport_ClearModulesByIndex() can crash. Normally, + * a single-phase init module cannot be imported in an + * isolated interpreter, but there are ways around that. + * Hence, heere be dragons! Ideally we would instead do + * something like make a read-only, immortal copy of the + * dict using PyMem_RawMalloc() and store *that* in m_copy. + * Then we'd need to make sure to clear that when the + * runtime is finalized, rather than in + * PyImport_ClearModulesByIndex(). */ + PyObject *old = value->def->m_base.m_copy; + if (old != NULL) { + // XXX This should really never happen. + Py_DECREF(old); + } + + value->def->m_base.m_copy = copied; + return 0; +} + +static void +del_cached_m_dict(struct extensions_cache_value *value) +{ + assert(value != NULL); + if (value->def != NULL) { + Py_XDECREF((PyObject *)value->def->m_base.m_copy); + value->def->m_base.m_copy = NULL; + } +} + static PyObject * get_core_module_dict( PyInterpreterState *interp, PyObject *name, PyObject *path); @@ -990,23 +1040,44 @@ get_cached_m_dict(struct extensions_cache_value *value, static int update_extensions_cache_value(struct extensions_cache_value *value, - PyModuleDef *def, _Py_ext_module_origin origin) + PyModuleDef *def, PyObject *m_dict, + _Py_ext_module_origin origin) { assert(def != NULL); /* We expect it to be static, so it must be the same pointer. */ assert(value->def == NULL || value->def == def); + assert(def->m_base.m_init == NULL || m_dict == NULL); + /* For now we don't worry about comparing value->m_copy. */ + assert(def->m_base.m_copy == NULL || m_dict != NULL); /* We assume that all module defs are statically allocated and will never be freed. Otherwise, we would incref here. */ _Py_SetImmortalUntracked((PyObject *)def); - *value = (struct extensions_cache_value){ + struct extensions_cache_value temp = { .def=def, .origin=origin, }; + if (set_cached_m_dict(&temp, m_dict) < 0) { + return -1; + } + + *value = temp; return 0; } +static void +copy_extensions_cache_value(struct extensions_cache_value *value, + struct extensions_cache_value *updates) +{ + if (value->def != NULL + && value->def->m_base.m_copy != updates->def->m_base.m_copy) + { + del_cached_m_dict(value); + } + *value = *updates; +} + static void clear_extensions_cache_value(struct extensions_cache_value *value) { @@ -1016,6 +1087,7 @@ clear_extensions_cache_value(struct extensions_cache_value *value) dynamically allocated, it were the last ref, and this function were called with an interpreter other than the def's owner. */ assert(value->def == NULL || _Py_IsImmortal(value->def)); + del_cached_m_dict(value); } static void @@ -1137,7 +1209,8 @@ _extensions_cache_get(PyObject *path, PyObject *name) /* This can only fail with "out of memory". */ static int _extensions_cache_set(PyObject *path, PyObject *name, - PyModuleDef *def,_Py_ext_module_origin origin) + PyModuleDef *def, PyObject *m_dict, + _Py_ext_module_origin origin) { int res = -1; assert(def != NULL); @@ -1147,7 +1220,7 @@ _extensions_cache_set(PyObject *path, PyObject *name, struct extensions_cache_value *newvalue = NULL; struct extensions_cache_value updates = {0}; - if (update_extensions_cache_value(&updates, def, origin) < 0) { + if (update_extensions_cache_value(&updates, def, m_dict, origin) < 0) { return -1; } @@ -1191,18 +1264,18 @@ _extensions_cache_set(PyObject *path, PyObject *name, res = 0; finally: - extensions_lock_release(); - if (key != NULL) { - hashtable_destroy_str(key); - } - if (value != NULL) { - *value = updates; + copy_extensions_cache_value(value, &updates); } else if (newvalue != NULL) { del_extensions_cache_value(newvalue); } + extensions_lock_release(); + if (key != NULL) { + hashtable_destroy_str(key); + } + return res; } @@ -1383,6 +1456,8 @@ update_global_state_for_extension(PyThreadState *tstate, PyModuleDef *def, struct singlephase_global_update *singlephase) { + PyObject *m_dict = NULL; + /* Copy the module's __dict__, if applicable. */ if (singlephase == NULL) { assert(def->m_base.m_copy == NULL); @@ -1408,39 +1483,15 @@ update_global_state_for_extension(PyThreadState *tstate, assert(def->m_base.m_init == NULL); } else { - assert(PyDict_Check(singlephase->m_dict)); + assert(singlephase->m_dict != NULL + && PyDict_Check(singlephase->m_dict)); // gh-88216: Extensions and def->m_base.m_copy can be updated // when the extension module doesn't support sub-interpreters. assert(def->m_size == -1); assert(!is_core_module(tstate->interp, name, path)); assert(PyUnicode_CompareWithASCIIString(name, "sys") != 0); assert(PyUnicode_CompareWithASCIIString(name, "builtins") != 0); - /* XXX gh-88216: The copied dict is owned by the current - * interpreter. That's a problem if the interpreter has - * its own obmalloc state or if the module is successfully - * imported into such an interpreter. If the interpreter - * has its own GIL then there may be data races and - * PyImport_ClearModulesByIndex() can crash. Normally, - * a single-phase init module cannot be imported in an - * isolated interpreter, but there are ways around that. - * Hence, heere be dragons! Ideally we would instead do - * something like make a read-only, immortal copy of the - * dict using PyMem_RawMalloc() and store *that* in m_copy. - * Then we'd need to make sure to clear that when the - * runtime is finalized, rather than in - * PyImport_ClearModulesByIndex(). */ - if (def->m_base.m_copy) { - /* Somebody already imported the module, - likely under a different name. - XXX this should really not happen. */ - Py_CLEAR(def->m_base.m_copy); - } - def->m_base.m_copy = PyDict_Copy(singlephase->m_dict); - if (def->m_base.m_copy == NULL) { - // XXX Ignore this error? Doing so would effectively - // mark the module as not loadable. */ - return -1; - } + m_dict = singlephase->m_dict; } } @@ -1453,7 +1504,7 @@ update_global_state_for_extension(PyThreadState *tstate, assert(cached == NULL || cached->def == def); #endif if (_extensions_cache_set( - path, name, def, singlephase->origin) < 0) + path, name, def, m_dict, singlephase->origin) < 0) { // XXX Ignore this error? Doing so would effectively // mark the module as not loadable. From 12bcd96c79ff60969e0cc64a51cc083d8aaa7bda Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 30 Apr 2024 19:07:24 -0600 Subject: [PATCH 09/23] Pass m_init to _extensions_cache_set(). --- Python/import.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/Python/import.c b/Python/import.c index 5529402ca4ae60..a49433f275b624 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1040,13 +1040,16 @@ get_cached_m_dict(struct extensions_cache_value *value, static int update_extensions_cache_value(struct extensions_cache_value *value, - PyModuleDef *def, PyObject *m_dict, - _Py_ext_module_origin origin) + PyModuleDef *def, PyModInitFunction m_init, + PyObject *m_dict, _Py_ext_module_origin origin) { assert(def != NULL); /* We expect it to be static, so it must be the same pointer. */ assert(value->def == NULL || value->def == def); - assert(def->m_base.m_init == NULL || m_dict == NULL); + assert(m_init == NULL || m_dict == NULL); + /* We expect the same symbol to be used and the shared object file + * to have remained loaded, so it must be the same pointer. */ + assert(def->m_base.m_init == NULL || def->m_base.m_init == m_init); /* For now we don't worry about comparing value->m_copy. */ assert(def->m_base.m_copy == NULL || m_dict != NULL); @@ -1058,6 +1061,7 @@ update_extensions_cache_value(struct extensions_cache_value *value, .def=def, .origin=origin, }; + def->m_base.m_init = m_init; if (set_cached_m_dict(&temp, m_dict) < 0) { return -1; } @@ -1209,8 +1213,8 @@ _extensions_cache_get(PyObject *path, PyObject *name) /* This can only fail with "out of memory". */ static int _extensions_cache_set(PyObject *path, PyObject *name, - PyModuleDef *def, PyObject *m_dict, - _Py_ext_module_origin origin) + PyModuleDef *def, PyModInitFunction m_init, + PyObject *m_dict, _Py_ext_module_origin origin) { int res = -1; assert(def != NULL); @@ -1220,7 +1224,9 @@ _extensions_cache_set(PyObject *path, PyObject *name, struct extensions_cache_value *newvalue = NULL; struct extensions_cache_value updates = {0}; - if (update_extensions_cache_value(&updates, def, m_dict, origin) < 0) { + if (update_extensions_cache_value( + &updates, def, m_init, m_dict, origin) < 0) + { return -1; } @@ -1456,10 +1462,12 @@ update_global_state_for_extension(PyThreadState *tstate, PyModuleDef *def, struct singlephase_global_update *singlephase) { + PyModInitFunction m_init = NULL; PyObject *m_dict = NULL; /* Copy the module's __dict__, if applicable. */ if (singlephase == NULL) { + assert(def->m_base.m_init == NULL); assert(def->m_base.m_copy == NULL); } else { @@ -1473,7 +1481,7 @@ update_global_state_for_extension(PyThreadState *tstate, // We should prevent this somehow. The simplest solution // is probably to store m_copy/m_init in the cache along // with the def, rather than within the def. - def->m_base.m_init = singlephase->m_init; + m_init = singlephase->m_init; } else if (singlephase->m_dict == NULL) { /* It must be a core builtin module. */ @@ -1504,7 +1512,7 @@ update_global_state_for_extension(PyThreadState *tstate, assert(cached == NULL || cached->def == def); #endif if (_extensions_cache_set( - path, name, def, m_dict, singlephase->origin) < 0) + path, name, def, m_init, m_dict, singlephase->origin) < 0) { // XXX Ignore this error? Doing so would effectively // mark the module as not loadable. @@ -1547,7 +1555,6 @@ reload_singlephase_extension(PyThreadState *tstate, PyModuleDef *def = cached->def; assert(def != NULL); assert_singlephase(cached); - PyModInitFunction m_init = def->m_base.m_init; PyObject *mod = NULL; /* It may have been successfully imported previously @@ -1562,7 +1569,6 @@ reload_singlephase_extension(PyThreadState *tstate, PyObject *modules = get_modules_dict(tstate, true); if (def->m_size == -1) { /* Module does not support repeated initialization */ - assert(m_init == NULL); assert(def->m_base.m_init == NULL); // XXX Copying the cached dict may break interpreter isolation. // We could solve this by temporarily acquiring the original @@ -1598,12 +1604,14 @@ reload_singlephase_extension(PyThreadState *tstate, || _PyModule_GetDef(mod) == def); } else { - if (def->m_base.m_init == NULL) { + assert(def->m_base.m_copy == NULL); + PyModInitFunction p0 = def->m_base.m_init; + if (p0 == NULL) { assert(!PyErr_Occurred()); return NULL; } struct _Py_ext_module_loader_result res; - if (_PyImport_RunModInitFunc(def->m_base.m_init, info, &res) < 0) { + if (_PyImport_RunModInitFunc(p0, info, &res) < 0) { _Py_ext_module_loader_result_apply_error(&res, name_buf); return NULL; } From 2d56adaa7d05572f9e92c0e287052fcbdfb31a9d Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 1 May 2024 20:13:25 -0600 Subject: [PATCH 10/23] Add some comments. --- Python/import.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Python/import.c b/Python/import.c index a49433f275b624..f630acd8b35f04 100644 --- a/Python/import.c +++ b/Python/import.c @@ -587,6 +587,8 @@ _PyImport_ClearModulesByIndex(PyInterpreterState *interp) /* cleanup the saved copy of module dicts */ PyModuleDef *md = PyModule_GetDef(m); if (md) { + // XXX Do this more carefully. The dict might be owned + // by another interpreter. Py_CLEAR(md->m_base.m_copy); } } @@ -982,6 +984,7 @@ set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) /* We expect this can only be "out of memory". */ return -1; } + // XXX We may want to make copied immortal. } /* XXX gh-88216: The copied dict is owned by the current From 9c70d5317ada2bbc54b3fd47ca88c2ee1af59752 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 1 May 2024 20:23:32 -0600 Subject: [PATCH 11/23] Use del_cached_m_dict() in set_cached_m_dict(). --- Python/import.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Python/import.c b/Python/import.c index f630acd8b35f04..088b02bbd52fa2 100644 --- a/Python/import.c +++ b/Python/import.c @@ -970,6 +970,8 @@ free_extensions_cache_value(struct extensions_cache_value *value) PyMem_RawFree(value); } +static void del_cached_m_dict(struct extensions_cache_value *); + static int set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) { @@ -1001,11 +1003,9 @@ set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) * Then we'd need to make sure to clear that when the * runtime is finalized, rather than in * PyImport_ClearModulesByIndex(). */ - PyObject *old = value->def->m_base.m_copy; - if (old != NULL) { - // XXX This should really never happen. - Py_DECREF(old); - } + /* Ideally, the cached dict would always be NULL at this point. */ + assert(value->def->m_base.m_copy == NULL || copied != NULL); + del_cached_m_dict(value); value->def->m_base.m_copy = copied; return 0; @@ -1016,7 +1016,7 @@ del_cached_m_dict(struct extensions_cache_value *value) { assert(value != NULL); if (value->def != NULL) { - Py_XDECREF((PyObject *)value->def->m_base.m_copy); + Py_XDECREF(value->def->m_base.m_copy); value->def->m_base.m_copy = NULL; } } @@ -1069,6 +1069,7 @@ update_extensions_cache_value(struct extensions_cache_value *value, return -1; } + del_cached_m_dict(value); *value = temp; return 0; } From 0098057614166c1dd1a3326a3f5b2ef0720e27b0 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 30 Apr 2024 19:32:59 -0600 Subject: [PATCH 12/23] Add m_init and m_dict fields to extensions_cache_value. --- Python/import.c | 86 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 27 deletions(-) diff --git a/Python/import.c b/Python/import.c index 088b02bbd52fa2..a61a7ed3c02fd2 100644 --- a/Python/import.c +++ b/Python/import.c @@ -948,6 +948,18 @@ typedef PyDictObject *cached_m_dict_t; struct extensions_cache_value { PyModuleDef *def; + /* The function used to re-initialize the module. + This is only set for legacy (single-phase init) extension modules + and only used for those that support multiple initializations + (m_size >= 0). + It is set by update_global_state_for_extension(). */ + PyModInitFunction m_init; + /* A copy of the module's __dict__ after the first time it was loaded. + This is only set/used for legacy modules that do not support + multiple initializations. + It is set by update_global_state_for_extension(). */ + cached_m_dict_t m_dict; + _Py_ext_module_origin origin; }; @@ -970,8 +982,6 @@ free_extensions_cache_value(struct extensions_cache_value *value) PyMem_RawFree(value); } -static void del_cached_m_dict(struct extensions_cache_value *); - static int set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) { @@ -1005,20 +1015,27 @@ set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) * PyImport_ClearModulesByIndex(). */ /* Ideally, the cached dict would always be NULL at this point. */ assert(value->def->m_base.m_copy == NULL || copied != NULL); - del_cached_m_dict(value); + assert(value->m_dict == NULL || copied != NULL); + Py_XDECREF(value->def->m_base.m_copy); + Py_XDECREF((PyObject *)value->m_dict); - value->def->m_base.m_copy = copied; + value->def->m_base.m_copy = Py_XNewRef(copied); + value->m_dict = (cached_m_dict_t)copied; return 0; } static void -del_cached_m_dict(struct extensions_cache_value *value) +del_cached_m_dict(struct extensions_cache_value *value, + struct extensions_cache_value *compare) { - assert(value != NULL); - if (value->def != NULL) { + if (compare == NULL && value->def != NULL) { Py_XDECREF(value->def->m_base.m_copy); value->def->m_base.m_copy = NULL; } + if (compare == NULL || compare->m_dict != value->m_dict) { + Py_XDECREF((PyObject *)value->m_dict); + value->m_dict = NULL; + } } static PyObject * get_core_module_dict( @@ -1036,6 +1053,7 @@ get_cached_m_dict(struct extensions_cache_value *value, return get_core_module_dict(interp, name, path); } assert(value->def != NULL); + // XXX Switch to value->m_dict. PyObject *m_dict = value->def->m_base.m_copy; Py_XINCREF(m_dict); return m_dict; @@ -1052,9 +1070,10 @@ update_extensions_cache_value(struct extensions_cache_value *value, assert(m_init == NULL || m_dict == NULL); /* We expect the same symbol to be used and the shared object file * to have remained loaded, so it must be the same pointer. */ - assert(def->m_base.m_init == NULL || def->m_base.m_init == m_init); + assert(value->m_init == NULL || value->m_init == m_init); /* For now we don't worry about comparing value->m_copy. */ assert(def->m_base.m_copy == NULL || m_dict != NULL); + assert(value->m_dict == NULL || m_dict != NULL); /* We assume that all module defs are statically allocated and will never be freed. Otherwise, we would incref here. */ @@ -1062,6 +1081,7 @@ update_extensions_cache_value(struct extensions_cache_value *value, struct extensions_cache_value temp = { .def=def, + .m_init=m_init, .origin=origin, }; def->m_base.m_init = m_init; @@ -1069,7 +1089,14 @@ update_extensions_cache_value(struct extensions_cache_value *value, return -1; } - del_cached_m_dict(value); + if (m_init != NULL) { + assert(temp.m_dict == NULL); + } + else { + assert(temp.m_init == NULL); + } + + del_cached_m_dict(value, NULL); *value = temp; return 0; } @@ -1078,11 +1105,11 @@ static void copy_extensions_cache_value(struct extensions_cache_value *value, struct extensions_cache_value *updates) { - if (value->def != NULL - && value->def->m_base.m_copy != updates->def->m_base.m_copy) - { - del_cached_m_dict(value); - } + assert(updates != value); + assert(value->def == NULL || updates->def == value->def); + // XXX Skipping this (or an extra incref in set_cached_m_dict() + // should not be necessary. + //del_cached_m_dict(value, updates); *value = *updates; } @@ -1095,7 +1122,7 @@ clear_extensions_cache_value(struct extensions_cache_value *value) dynamically allocated, it were the last ref, and this function were called with an interpreter other than the def's owner. */ assert(value->def == NULL || _Py_IsImmortal(value->def)); - del_cached_m_dict(value); + del_cached_m_dict(value, NULL); } static void @@ -1238,7 +1265,7 @@ _extensions_cache_set(PyObject *path, PyObject *name, if (EXTENSIONS.hashtable == NULL) { if (_extensions_cache_init() < 0) { - goto finally; + goto error; } } @@ -1248,11 +1275,11 @@ _extensions_cache_set(PyObject *path, PyObject *name, /* It was never added. */ newvalue = alloc_extensions_cache_value(); if (newvalue == NULL) { - goto finally; + goto error; } if (_Py_hashtable_set(EXTENSIONS.hashtable, key, newvalue) < 0) { PyErr_NoMemory(); - goto finally; + goto error; } value = newvalue; /* The hashtable owns the key now. */ @@ -1264,23 +1291,24 @@ _extensions_cache_set(PyObject *path, PyObject *name, /* It was previously deleted. */ newvalue = alloc_extensions_cache_value(); if (newvalue == NULL) { - goto finally; + goto error; } value = newvalue; entry->value = value; } } + copy_extensions_cache_value(value, &updates); res = 0; + goto finally; -finally: - if (value != NULL) { - copy_extensions_cache_value(value, &updates); - } - else if (newvalue != NULL) { +error: + if (newvalue != NULL) { del_extensions_cache_value(newvalue); } + clear_extensions_cache_value(&updates); +finally: extensions_lock_release(); if (key != NULL) { hashtable_destroy_str(key); @@ -1396,6 +1424,7 @@ is_core_module(PyInterpreterState *interp, PyObject *name, PyObject *path) return 0; } + #ifndef NDEBUG static _Py_ext_module_kind _get_extension_kind(PyModuleDef *def, bool check_size) @@ -1469,7 +1498,7 @@ update_global_state_for_extension(PyThreadState *tstate, PyModInitFunction m_init = NULL; PyObject *m_dict = NULL; - /* Copy the module's __dict__, if applicable. */ + /* Set up for _extensions_cache_set(). */ if (singlephase == NULL) { assert(def->m_base.m_init == NULL); assert(def->m_base.m_copy == NULL); @@ -1495,8 +1524,7 @@ update_global_state_for_extension(PyThreadState *tstate, assert(def->m_base.m_init == NULL); } else { - assert(singlephase->m_dict != NULL - && PyDict_Check(singlephase->m_dict)); + assert(PyDict_Check(singlephase->m_dict)); // gh-88216: Extensions and def->m_base.m_copy can be updated // when the extension module doesn't support sub-interpreters. assert(def->m_size == -1); @@ -1573,6 +1601,7 @@ reload_singlephase_extension(PyThreadState *tstate, PyObject *modules = get_modules_dict(tstate, true); if (def->m_size == -1) { /* Module does not support repeated initialization */ + assert(cached->m_init == NULL); assert(def->m_base.m_init == NULL); // XXX Copying the cached dict may break interpreter isolation. // We could solve this by temporarily acquiring the original @@ -1608,7 +1637,9 @@ reload_singlephase_extension(PyThreadState *tstate, || _PyModule_GetDef(mod) == def); } else { + assert(cached->m_dict == NULL); assert(def->m_base.m_copy == NULL); + // XXX Use cached->m_init. PyModInitFunction p0 = def->m_base.m_init; if (p0 == NULL) { assert(!PyErr_Occurred()); @@ -1752,6 +1783,7 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, else { /* We will reload via the init function. */ assert(def->m_size >= 0); + assert(def->m_base.m_copy == NULL); singlephase.m_init = p0; } if (update_global_state_for_extension( From 57aaba259c00f8fa95ea83f55a40f49301c4782a Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 1 May 2024 11:43:31 -0600 Subject: [PATCH 13/23] Track which interpreter was used to create m_dict. --- Python/import.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Python/import.c b/Python/import.c index a61a7ed3c02fd2..4d61776811771b 100644 --- a/Python/import.c +++ b/Python/import.c @@ -959,6 +959,7 @@ struct extensions_cache_value { multiple initializations. It is set by update_global_state_for_extension(). */ cached_m_dict_t m_dict; + PyInterpreterState *m_dict_interp; _Py_ext_module_origin origin; }; @@ -988,8 +989,11 @@ set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) assert(value != NULL); assert(value->def != NULL); + PyInterpreterState *interp = NULL; PyObject *copied = NULL; if (m_dict != NULL) { + interp = _PyInterpreterState_GET(); + assert(value->origin != _Py_ext_module_origin_CORE); assert(PyDict_Check(m_dict)); copied = PyDict_Copy(m_dict); if (copied == NULL) { @@ -1021,6 +1025,7 @@ set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) value->def->m_base.m_copy = Py_XNewRef(copied); value->m_dict = (cached_m_dict_t)copied; + value->m_dict_interp = interp; return 0; } @@ -1074,6 +1079,8 @@ update_extensions_cache_value(struct extensions_cache_value *value, /* For now we don't worry about comparing value->m_copy. */ assert(def->m_base.m_copy == NULL || m_dict != NULL); assert(value->m_dict == NULL || m_dict != NULL); + assert((value->m_dict == NULL) == (value->m_dict_interp == NULL)); + assert(origin != _Py_ext_module_origin_CORE || m_dict == NULL); /* We assume that all module defs are statically allocated and will never be freed. Otherwise, we would incref here. */ From 96932eb8601897a8283535de5cb46fcae3f54c5d Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 1 May 2024 12:12:26 -0600 Subject: [PATCH 14/23] Track the interpreter ID. --- Python/import.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/Python/import.c b/Python/import.c index 4d61776811771b..14e6657266b9b9 100644 --- a/Python/import.c +++ b/Python/import.c @@ -959,10 +959,12 @@ struct extensions_cache_value { multiple initializations. It is set by update_global_state_for_extension(). */ cached_m_dict_t m_dict; - PyInterpreterState *m_dict_interp; + int64_t m_dict_interpid; _Py_ext_module_origin origin; }; +#define EXTENSIONS_CACHE_VALUE_INIT \ + (struct extensions_cache_value){ .m_dict_interpid=-1 } static struct extensions_cache_value * alloc_extensions_cache_value(void) @@ -973,7 +975,7 @@ alloc_extensions_cache_value(void) PyErr_NoMemory(); return NULL; } - *value = (struct extensions_cache_value){0}; + *value = EXTENSIONS_CACHE_VALUE_INIT; return value; } @@ -989,10 +991,9 @@ set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) assert(value != NULL); assert(value->def != NULL); - PyInterpreterState *interp = NULL; + int64_t interpid = -1; PyObject *copied = NULL; if (m_dict != NULL) { - interp = _PyInterpreterState_GET(); assert(value->origin != _Py_ext_module_origin_CORE); assert(PyDict_Check(m_dict)); copied = PyDict_Copy(m_dict); @@ -1001,6 +1002,11 @@ set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) return -1; } // XXX We may want to make copied immortal. + + value->def->m_base.m_cache_has_m_dict = 1; + + PyInterpreterState *interp = _PyInterpreterState_GET(); + interpid = PyInterpreterState_GetID(interp); } /* XXX gh-88216: The copied dict is owned by the current @@ -1025,7 +1031,7 @@ set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) value->def->m_base.m_copy = Py_XNewRef(copied); value->m_dict = (cached_m_dict_t)copied; - value->m_dict_interp = interp; + value->m_dict_interpid = interpid; return 0; } @@ -1079,7 +1085,7 @@ update_extensions_cache_value(struct extensions_cache_value *value, /* For now we don't worry about comparing value->m_copy. */ assert(def->m_base.m_copy == NULL || m_dict != NULL); assert(value->m_dict == NULL || m_dict != NULL); - assert((value->m_dict == NULL) == (value->m_dict_interp == NULL)); + assert((value->m_dict == NULL) == (value->m_dict_interpid < 0)); assert(origin != _Py_ext_module_origin_CORE || m_dict == NULL); /* We assume that all module defs are statically allocated @@ -1261,7 +1267,7 @@ _extensions_cache_set(PyObject *path, PyObject *name, struct extensions_cache_value *value = NULL; struct extensions_cache_value *newvalue = NULL; - struct extensions_cache_value updates = {0}; + struct extensions_cache_value updates = EXTENSIONS_CACHE_VALUE_INIT; if (update_extensions_cache_value( &updates, def, m_init, m_dict, origin) < 0) { From a4cb134f828b6075fb943dcbf7a20df6370707b3 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 1 May 2024 11:35:51 -0600 Subject: [PATCH 15/23] Look up core module dicts in get_cached_m_dict(). --- Python/import.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Python/import.c b/Python/import.c index 14e6657266b9b9..621a3e3d5492f7 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1001,9 +1001,8 @@ set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) /* We expect this can only be "out of memory". */ return -1; } - // XXX We may want to make copied immortal. - value->def->m_base.m_cache_has_m_dict = 1; + // XXX We may want to make copied immortal. PyInterpreterState *interp = _PyInterpreterState_GET(); interpid = PyInterpreterState_GetID(interp); From c4d3be19d23ec788c1a708912fa74a1664500c44 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 1 May 2024 12:12:56 -0600 Subject: [PATCH 16/23] Make sure we only set m_dict for non-isolated interpreters. --- Python/import.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Python/import.c b/Python/import.c index 621a3e3d5492f7..36d86f056f5571 100644 --- a/Python/import.c +++ b/Python/import.c @@ -34,6 +34,17 @@ module _imp #include "clinic/import.c.h" +#ifndef NDEBUG +static bool +is_interpreter_isolated(PyInterpreterState *interp) +{ + return !_Py_IsMainInterpreter(interp) + && !(interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) + && interp->ceval.own_gil; +} +#endif + + /*******************************/ /* process-global import state */ /*******************************/ @@ -1006,6 +1017,7 @@ set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) PyInterpreterState *interp = _PyInterpreterState_GET(); interpid = PyInterpreterState_GetID(interp); + assert(!is_interpreter_isolated(interp)); } /* XXX gh-88216: The copied dict is owned by the current From d87584d44f65b9aeeb98b4113945913026ef429d Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 2 May 2024 18:16:42 -0600 Subject: [PATCH 17/23] Simplify. --- Python/import.c | 104 +++++++++++++++++++----------------------------- 1 file changed, 40 insertions(+), 64 deletions(-) diff --git a/Python/import.c b/Python/import.c index 36d86f056f5571..4d2b5509f9c292 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1081,62 +1081,6 @@ get_cached_m_dict(struct extensions_cache_value *value, return m_dict; } -static int -update_extensions_cache_value(struct extensions_cache_value *value, - PyModuleDef *def, PyModInitFunction m_init, - PyObject *m_dict, _Py_ext_module_origin origin) -{ - assert(def != NULL); - /* We expect it to be static, so it must be the same pointer. */ - assert(value->def == NULL || value->def == def); - assert(m_init == NULL || m_dict == NULL); - /* We expect the same symbol to be used and the shared object file - * to have remained loaded, so it must be the same pointer. */ - assert(value->m_init == NULL || value->m_init == m_init); - /* For now we don't worry about comparing value->m_copy. */ - assert(def->m_base.m_copy == NULL || m_dict != NULL); - assert(value->m_dict == NULL || m_dict != NULL); - assert((value->m_dict == NULL) == (value->m_dict_interpid < 0)); - assert(origin != _Py_ext_module_origin_CORE || m_dict == NULL); - - /* We assume that all module defs are statically allocated - and will never be freed. Otherwise, we would incref here. */ - _Py_SetImmortalUntracked((PyObject *)def); - - struct extensions_cache_value temp = { - .def=def, - .m_init=m_init, - .origin=origin, - }; - def->m_base.m_init = m_init; - if (set_cached_m_dict(&temp, m_dict) < 0) { - return -1; - } - - if (m_init != NULL) { - assert(temp.m_dict == NULL); - } - else { - assert(temp.m_init == NULL); - } - - del_cached_m_dict(value, NULL); - *value = temp; - return 0; -} - -static void -copy_extensions_cache_value(struct extensions_cache_value *value, - struct extensions_cache_value *updates) -{ - assert(updates != value); - assert(value->def == NULL || updates->def == value->def); - // XXX Skipping this (or an extra incref in set_cached_m_dict() - // should not be necessary. - //del_cached_m_dict(value, updates); - *value = *updates; -} - static void clear_extensions_cache_value(struct extensions_cache_value *value) { @@ -1273,18 +1217,35 @@ _extensions_cache_set(PyObject *path, PyObject *name, { int res = -1; assert(def != NULL); + assert(m_init == NULL || m_dict == NULL); + /* We expect the same symbol to be used and the shared object file + * to have remained loaded, so it must be the same pointer. */ + assert(def->m_base.m_init == NULL || def->m_base.m_init == m_init); + /* For now we don't worry about comparing value->m_copy. */ + assert(def->m_base.m_copy == NULL || m_dict != NULL); assert((origin == _Py_ext_module_origin_DYNAMIC) == (name != path)); - void *key = NULL; - struct extensions_cache_value *value = NULL; - struct extensions_cache_value *newvalue = NULL; + assert(origin != _Py_ext_module_origin_CORE || m_dict == NULL); - struct extensions_cache_value updates = EXTENSIONS_CACHE_VALUE_INIT; - if (update_extensions_cache_value( - &updates, def, m_init, m_dict, origin) < 0) - { + /* We assume that all module defs are statically allocated + and will never be freed. Otherwise, we would incref here. */ + _Py_SetImmortalUntracked((PyObject *)def); + + /* Generate the new cache value data before taking the lock. */ + struct extensions_cache_value updates = { + .def=def, + .m_init=m_init, + .origin=origin, + }; + def->m_base.m_init = m_init; + if (set_cached_m_dict(&updates, m_dict) < 0) { return -1; } + /* Move on to the locked section. */ + void *key = NULL; + struct extensions_cache_value *value = NULL; + struct extensions_cache_value *newvalue = NULL; + extensions_lock_acquire(); if (EXTENSIONS.hashtable == NULL) { @@ -1293,6 +1254,7 @@ _extensions_cache_set(PyObject *path, PyObject *name, } } + /* Ensure there's a cached value for the module. */ _Py_hashtable_entry_t *entry = _extensions_cache_find_unlocked(path, name, &key); if (entry == NULL) { @@ -1320,9 +1282,23 @@ _extensions_cache_set(PyObject *path, PyObject *name, value = newvalue; entry->value = value; } + else { + /* We expect it to be static, so it must be the same pointer. */ + assert(value->def == def); + /* We expect the same symbol to be used and the shared object file + * to have remained loaded, so it must be the same pointer. */ + assert(value->m_init == m_init); + assert(value->m_dict == NULL || m_dict != NULL); + assert((value->m_dict == NULL) == (value->m_dict_interpid < 0)); + } } - copy_extensions_cache_value(value, &updates); + /* Fill the cache value with the data we generated earlier. */ + // XXX Skipping this (or an extra incref in set_cached_m_dict() + // should not be necessary. + //del_cached_m_dict(value, updates); + *value = updates; + res = 0; goto finally; From 336d1da8c9f0469d90df2f93c8cf32a061ce32c4 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 3 May 2024 10:36:32 -0600 Subject: [PATCH 18/23] Fix a comment. --- Python/import.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Python/import.c b/Python/import.c index 4d2b5509f9c292..9151d9911d54bf 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1630,9 +1630,9 @@ reload_singlephase_extension(PyThreadState *tstate, } /* We can't set mod->md_def if it's missing, * because _PyImport_ClearModulesByIndex() might break - * due to violating interpreter isolation. See the note - * in fix_up_extension_for_interpreter(). Until that - * is solved, we leave md_def set to NULL. */ + * due to violating interpreter isolation. + * See the note in set_cached_m_dict(). + * Until that is solved, we leave md_def set to NULL. */ assert(_PyModule_GetDef(mod) == NULL || _PyModule_GetDef(mod) == def); } From bd1e134bad535db447dbf1647ed2b11f674b6341 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 3 May 2024 17:02:46 -0600 Subject: [PATCH 19/23] Separate the def logic from the rest of the extensions cache code. --- Python/import.c | 311 ++++++++++++++++++++++++++++++------------------ 1 file changed, 193 insertions(+), 118 deletions(-) diff --git a/Python/import.c b/Python/import.c index 9151d9911d54bf..c56e85da3465ad 100644 --- a/Python/import.c +++ b/Python/import.c @@ -954,7 +954,12 @@ extensions_lock_release(void) dictionary, to avoid loading shared libraries twice. */ -typedef PyDictObject *cached_m_dict_t; +typedef struct cached_m_dict { + /* A shallow copy of the original module's __dict__. */ + PyObject *copied; + /* The interpreter that owns the copy. */ + int64_t interpid; +} *cached_m_dict_t; struct extensions_cache_value { PyModuleDef *def; @@ -965,17 +970,16 @@ struct extensions_cache_value { (m_size >= 0). It is set by update_global_state_for_extension(). */ PyModInitFunction m_init; + /* A copy of the module's __dict__ after the first time it was loaded. This is only set/used for legacy modules that do not support multiple initializations. - It is set by update_global_state_for_extension(). */ + It is set exclusively by fixup_cached_def(). */ cached_m_dict_t m_dict; - int64_t m_dict_interpid; + struct cached_m_dict _m_dict; _Py_ext_module_origin origin; }; -#define EXTENSIONS_CACHE_VALUE_INIT \ - (struct extensions_cache_value){ .m_dict_interpid=-1 } static struct extensions_cache_value * alloc_extensions_cache_value(void) @@ -986,7 +990,7 @@ alloc_extensions_cache_value(void) PyErr_NoMemory(); return NULL; } - *value = EXTENSIONS_CACHE_VALUE_INIT; + *value = (struct extensions_cache_value){0}; return value; } @@ -996,29 +1000,78 @@ free_extensions_cache_value(struct extensions_cache_value *value) PyMem_RawFree(value); } -static int -set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) +static void +fixup_cached_def(struct extensions_cache_value *value) +{ + /* For the moment, the values in the def's m_base may belong + * to another module, and we're replacing them here. This can + * cause problems later if the old module is reloaded. + * + * Also, we don't decref any old cached values first when we + * replace them here, in case we need to restore them in the + * near future. Instead, the caller is responsible for wrapping + * this up by calling cleanup_old_cached_def() or + * restore_old_cached_def() if there was an error. */ + PyModuleDef *def = value->def; + assert(def != NULL); + + /* We assume that all module defs are statically allocated + and will never be freed. Otherwise, we would incref here. */ + _Py_SetImmortalUntracked((PyObject *)def); + + def->m_base.m_init = value->m_init; + + /* Different modules can share the same def, so we can't just + * expect m_copy to be NULL. */ + assert(def->m_base.m_copy == NULL + || def->m_base.m_init == NULL + || value->m_dict != NULL); + if (value->m_dict != NULL) { + assert(value->m_dict->copied != NULL); + /* As noted above, we don't first decref the old value, if any. */ + def->m_base.m_copy = Py_NewRef(value->m_dict->copied); + } +} + +static void +restore_old_cached_def(PyModuleDef *def, PyModuleDef_Base *oldbase) { - assert(value != NULL); - assert(value->def != NULL); + def->m_base = *oldbase; +} - int64_t interpid = -1; - PyObject *copied = NULL; - if (m_dict != NULL) { - assert(value->origin != _Py_ext_module_origin_CORE); - assert(PyDict_Check(m_dict)); - copied = PyDict_Copy(m_dict); - if (copied == NULL) { - /* We expect this can only be "out of memory". */ - return -1; - } +static void +cleanup_old_cached_def(PyModuleDef_Base *oldbase) +{ + Py_XDECREF(oldbase->m_copy); +} + +static void +del_cached_def(struct extensions_cache_value *value) +{ + /* If we hadn't made the stored defs immortal, we would decref here. + However, this decref would be problematic if the module def were + dynamically allocated, it were the last ref, and this function + were called with an interpreter other than the def's owner. */ + assert(value->def == NULL || _Py_IsImmortal(value->def)); - // XXX We may want to make copied immortal. + Py_XDECREF(value->def->m_base.m_copy); + value->def->m_base.m_copy = NULL; +} - PyInterpreterState *interp = _PyInterpreterState_GET(); - interpid = PyInterpreterState_GetID(interp); - assert(!is_interpreter_isolated(interp)); +static int +init_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) +{ + assert(value != NULL); + /* This should only have been called without an m_dict already set. */ + assert(value->m_dict == NULL); + if (m_dict == NULL) { + return 0; } + assert(PyDict_Check(m_dict)); + assert(value->origin != _Py_ext_module_origin_CORE); + + PyInterpreterState *interp = _PyInterpreterState_GET(); + assert(!is_interpreter_isolated(interp)); /* XXX gh-88216: The copied dict is owned by the current * interpreter. That's a problem if the interpreter has @@ -1034,28 +1087,34 @@ set_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) * Then we'd need to make sure to clear that when the * runtime is finalized, rather than in * PyImport_ClearModulesByIndex(). */ - /* Ideally, the cached dict would always be NULL at this point. */ - assert(value->def->m_base.m_copy == NULL || copied != NULL); - assert(value->m_dict == NULL || copied != NULL); - Py_XDECREF(value->def->m_base.m_copy); - Py_XDECREF((PyObject *)value->m_dict); + PyObject *copied = PyDict_Copy(m_dict); + if (copied == NULL) { + /* We expect this can only be "out of memory". */ + return -1; + } + // XXX We may want to make the copy immortal. + // XXX This incref shouldn't be necessary. We are decref'ing + // one to many times somewhere. + Py_INCREF(copied); - value->def->m_base.m_copy = Py_XNewRef(copied); - value->m_dict = (cached_m_dict_t)copied; - value->m_dict_interpid = interpid; + value->_m_dict = (struct cached_m_dict){ + .copied=copied, + .interpid=PyInterpreterState_GetID(interp), + }; + + value->m_dict = &value->_m_dict; return 0; } static void -del_cached_m_dict(struct extensions_cache_value *value, - struct extensions_cache_value *compare) -{ - if (compare == NULL && value->def != NULL) { - Py_XDECREF(value->def->m_base.m_copy); - value->def->m_base.m_copy = NULL; - } - if (compare == NULL || compare->m_dict != value->m_dict) { - Py_XDECREF((PyObject *)value->m_dict); +del_cached_m_dict(struct extensions_cache_value *value) +{ + if (value->m_dict != NULL) { + assert(value->m_dict == &value->_m_dict); + assert(value->m_dict->copied != NULL); + /* In the future we can take advantage of m_dict->interpid + * to decref the dict using the owning interpreter. */ + Py_XDECREF(value->m_dict->copied); value->m_dict = NULL; } } @@ -1081,23 +1140,12 @@ get_cached_m_dict(struct extensions_cache_value *value, return m_dict; } -static void -clear_extensions_cache_value(struct extensions_cache_value *value) -{ - assert(value != NULL); - /* If we hadn't made the stored defs immortal, we would decref here. - However, this decref would be problematic if the module def were - dynamically allocated, it were the last ref, and this function - were called with an interpreter other than the def's owner. */ - assert(value->def == NULL || _Py_IsImmortal(value->def)); - del_cached_m_dict(value, NULL); -} - static void del_extensions_cache_value(struct extensions_cache_value *value) { if (value != NULL) { - clear_extensions_cache_value(value); + del_cached_m_dict(value); + del_cached_def(value); free_extensions_cache_value(value); } } @@ -1226,89 +1274,88 @@ _extensions_cache_set(PyObject *path, PyObject *name, assert((origin == _Py_ext_module_origin_DYNAMIC) == (name != path)); assert(origin != _Py_ext_module_origin_CORE || m_dict == NULL); - /* We assume that all module defs are statically allocated - and will never be freed. Otherwise, we would incref here. */ - _Py_SetImmortalUntracked((PyObject *)def); - - /* Generate the new cache value data before taking the lock. */ - struct extensions_cache_value updates = { - .def=def, - .m_init=m_init, - .origin=origin, - }; - def->m_base.m_init = m_init; - if (set_cached_m_dict(&updates, m_dict) < 0) { - return -1; - } - - /* Move on to the locked section. */ void *key = NULL; struct extensions_cache_value *value = NULL; struct extensions_cache_value *newvalue = NULL; + PyModuleDef_Base olddefbase = def->m_base; extensions_lock_acquire(); if (EXTENSIONS.hashtable == NULL) { if (_extensions_cache_init() < 0) { - goto error; + goto finally; } } - /* Ensure there's a cached value for the module. */ + /* Create a cached value to populate for the module. */ _Py_hashtable_entry_t *entry = _extensions_cache_find_unlocked(path, name, &key); + value = entry == NULL + ? NULL + : (struct extensions_cache_value *)entry->value; + /* We should never be updating an existing cache value. */ + assert(value == NULL); + if (value != NULL) { + PyErr_Format(PyExc_SystemError, + "extension module %R is already cached", name); + goto finally; + } + newvalue = alloc_extensions_cache_value(); + if (newvalue == NULL) { + goto finally; + } + + /* Populate the new cache value data. */ + *newvalue = (struct extensions_cache_value){ + .def=def, + .m_init=m_init, + /* m_dict is set by set_cached_m_dict(). */ + .origin=origin, + }; + if (init_cached_m_dict(newvalue, m_dict) < 0) { + goto finally; + } + fixup_cached_def(newvalue); + if (entry == NULL) { /* It was never added. */ - newvalue = alloc_extensions_cache_value(); - if (newvalue == NULL) { - goto error; - } if (_Py_hashtable_set(EXTENSIONS.hashtable, key, newvalue) < 0) { PyErr_NoMemory(); - goto error; + goto finally; } - value = newvalue; /* The hashtable owns the key now. */ key = NULL; } + else if (value == NULL) { + /* It was previously deleted. */ + entry->value = newvalue; + } else { - value = (struct extensions_cache_value *)entry->value; - if (value == NULL) { - /* It was previously deleted. */ - newvalue = alloc_extensions_cache_value(); - if (newvalue == NULL) { - goto error; - } - value = newvalue; - entry->value = value; - } - else { - /* We expect it to be static, so it must be the same pointer. */ - assert(value->def == def); - /* We expect the same symbol to be used and the shared object file - * to have remained loaded, so it must be the same pointer. */ - assert(value->m_init == m_init); - assert(value->m_dict == NULL || m_dict != NULL); - assert((value->m_dict == NULL) == (value->m_dict_interpid < 0)); - } + /* We are updating the entry for an existing module. */ + /* We expect def to be static, so it must be the same pointer. */ + assert(value->def == def); + /* We expect the same symbol to be used and the shared object file + * to have remained loaded, so it must be the same pointer. */ + assert(value->m_init == m_init); + /* The same module can't switch between caching __dict__ and not. */ + assert((value->m_dict == NULL) == (m_dict == NULL)); + /* This shouldn't ever happen. */ + Py_UNREACHABLE(); } - /* Fill the cache value with the data we generated earlier. */ - // XXX Skipping this (or an extra incref in set_cached_m_dict() - // should not be necessary. - //del_cached_m_dict(value, updates); - *value = updates; - res = 0; - goto finally; -error: - if (newvalue != NULL) { - del_extensions_cache_value(newvalue); +finally: + if (res < 0) { + restore_old_cached_def(def, &olddefbase); + if (newvalue != NULL) { + del_extensions_cache_value(newvalue); + } + } + else { + cleanup_old_cached_def(&olddefbase); } - clear_extensions_cache_value(&updates); -finally: extensions_lock_release(); if (key != NULL) { hashtable_destroy_str(key); @@ -1875,15 +1922,21 @@ _PyImport_FixupBuiltin(PyThreadState *tstate, PyObject *mod, const char *name, assert(def->m_size == -1); assert(def->m_base.m_copy == NULL); - struct singlephase_global_update singlephase = { - /* We don't want def->m_base.m_copy populated. */ - .m_dict=NULL, - .origin=_Py_ext_module_origin_CORE, - }; - if (update_global_state_for_extension( - tstate, nameobj, nameobj, def, &singlephase) < 0) - { - goto finally; + /* We aren't using import_find_extension() for core modules, + * so we have to do the extra check to make sure the module + * isn't already in the global cache before calling + * update_global_state_for_extension(). */ + if (_extensions_cache_get(nameobj, nameobj) == NULL) { + struct singlephase_global_update singlephase = { + /* We don't want def->m_base.m_copy populated. */ + .m_dict=NULL, + .origin=_Py_ext_module_origin_CORE, + }; + if (update_global_state_for_extension( + tstate, nameobj, nameobj, def, &singlephase) < 0) + { + goto finally; + } } if (finish_singlephase_extension(tstate, mod, def, nameobj, modules) < 0) { @@ -1938,6 +1991,17 @@ create_builtin(PyThreadState *tstate, PyObject *name, PyObject *spec) goto finally; } + /* If the module was added to the global cache + * but def->m_base.m_copy was cleared (e.g. subinterp fini) + * then we have to do a little dance here. */ + struct extensions_cache_value *cached + = _extensions_cache_get(info.path, info.name); + if (cached != NULL) { + assert(cached->def->m_base.m_copy == NULL); + /* For now we clear the cache and move on. */ + _extensions_cache_delete(info.path, info.name); + } + struct _inittab *found = NULL; for (struct _inittab *p = INITTAB; p->name != NULL; p++) { if (_PyUnicode_EqualToASCIIString(info.name, p->name)) { @@ -4299,6 +4363,17 @@ _imp_create_dynamic_impl(PyObject *module, PyObject *spec, PyObject *file) } /* Otherwise it must be multi-phase init or the first time it's loaded. */ + /* If the module was added to the global cache + * but def->m_base.m_copy was cleared (e.g. subinterp fini) + * then we have to do a little dance here. */ + struct extensions_cache_value *cached + = _extensions_cache_get(info.path, info.name); + if (cached != NULL) { + assert(cached->def->m_base.m_copy == NULL); + /* For now we clear the cache and move on. */ + _extensions_cache_delete(info.path, info.name); + } + if (PySys_Audit("import", "OOOOO", info.name, info.filename, Py_None, Py_None, Py_None) < 0) { From e42e797a7daf572a0a8edd8d77230d6efca5290e Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 3 May 2024 17:15:04 -0600 Subject: [PATCH 20/23] Add the per-interpreter cache index to each global cache entry. --- Python/import.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/Python/import.c b/Python/import.c index c56e85da3465ad..c609f85924f4b0 100644 --- a/Python/import.c +++ b/Python/import.c @@ -971,6 +971,11 @@ struct extensions_cache_value { It is set by update_global_state_for_extension(). */ PyModInitFunction m_init; + /* The module's index into its interpreter's modules_by_index cache. + This is set for all extension modules but only used for legacy ones. + (See PyInterpreterState.modules_by_index for more info.) */ + Py_ssize_t m_index; + /* A copy of the module's __dict__ after the first time it was loaded. This is only set/used for legacy modules that do not support multiple initializations. @@ -1261,7 +1266,8 @@ _extensions_cache_get(PyObject *path, PyObject *name) static int _extensions_cache_set(PyObject *path, PyObject *name, PyModuleDef *def, PyModInitFunction m_init, - PyObject *m_dict, _Py_ext_module_origin origin) + Py_ssize_t m_index, PyObject *m_dict, + _Py_ext_module_origin origin) { int res = -1; assert(def != NULL); @@ -1309,6 +1315,7 @@ _extensions_cache_set(PyObject *path, PyObject *name, *newvalue = (struct extensions_cache_value){ .def=def, .m_init=m_init, + .m_index=m_index, /* m_dict is set by set_cached_m_dict(). */ .origin=origin, }; @@ -1531,8 +1538,9 @@ _get_extension_kind(PyModuleDef *def, bool check_size) struct singlephase_global_update { - PyObject *m_dict; PyModInitFunction m_init; + Py_ssize_t m_index; + PyObject *m_dict; _Py_ext_module_origin origin; }; @@ -1591,7 +1599,8 @@ update_global_state_for_extension(PyThreadState *tstate, assert(cached == NULL || cached->def == def); #endif if (_extensions_cache_set( - path, name, def, m_init, m_dict, singlephase->origin) < 0) + path, name, def, m_init, singlephase->m_index, m_dict, + singlephase->origin) < 0) { // XXX Ignore this error? Doing so would effectively // mark the module as not loadable. @@ -1817,6 +1826,10 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, /* Update global import state. */ struct singlephase_global_update singlephase = { + // XXX Modules that share a def should each get their own index, + // whereas currently they share (which means the per-interpreter + // cache is less reliable than it should be). + .m_index=def->m_base.m_index, .origin=info->origin, }; // gh-88216: Extensions and def->m_base.m_copy can be updated @@ -1921,6 +1934,7 @@ _PyImport_FixupBuiltin(PyThreadState *tstate, PyObject *mod, const char *name, assert_singlephase_def(def); assert(def->m_size == -1); assert(def->m_base.m_copy == NULL); + assert(def->m_base.m_index >= 0); /* We aren't using import_find_extension() for core modules, * so we have to do the extra check to make sure the module @@ -1928,6 +1942,7 @@ _PyImport_FixupBuiltin(PyThreadState *tstate, PyObject *mod, const char *name, * update_global_state_for_extension(). */ if (_extensions_cache_get(nameobj, nameobj) == NULL) { struct singlephase_global_update singlephase = { + .m_index=def->m_base.m_index, /* We don't want def->m_base.m_copy populated. */ .m_dict=NULL, .origin=_Py_ext_module_origin_CORE, From faa425426260192fd337e077664868920b6dc788 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Sat, 4 May 2024 12:53:35 -0600 Subject: [PATCH 21/23] Fix an assert. --- Python/import.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/Python/import.c b/Python/import.c index c609f85924f4b0..a0920ba2d5d66c 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1740,7 +1740,8 @@ reload_singlephase_extension(PyThreadState *tstate, static PyObject * import_find_extension(PyThreadState *tstate, - struct _Py_ext_module_loader_info *info) + struct _Py_ext_module_loader_info *info, + struct extensions_cache_value **p_cached) { /* Only single-phase init modules will be in the cache. */ struct extensions_cache_value *cached @@ -1750,6 +1751,7 @@ import_find_extension(PyThreadState *tstate, } assert(cached->def != NULL); assert_singlephase(cached); + *p_cached = cached; /* It may have been successfully imported previously in an interpreter that allows legacy modules @@ -1770,6 +1772,7 @@ import_find_extension(PyThreadState *tstate, PySys_FormatStderr("import %U # previously loaded (%R)\n", info->name, info->path); } + return mod; } @@ -1991,14 +1994,14 @@ create_builtin(PyThreadState *tstate, PyObject *name, PyObject *spec) return NULL; } - PyObject *mod = import_find_extension(tstate, &info); + struct extensions_cache_value *cached = NULL; + PyObject *mod = import_find_extension(tstate, &info, &cached); if (mod != NULL) { assert(!_PyErr_Occurred(tstate)); -#ifndef NDEBUG - struct extensions_cache_value *cached - = _extensions_cache_get(info.path, info.name); -#endif - assert(cached->def == _PyModule_GetDef(mod)); + assert(cached != NULL); + /* The module might not have md_def set in certain reload cases. */ + assert(_PyModule_GetDef(mod) == NULL + || cached->def == _PyModule_GetDef(mod)); assert_singlephase(cached); goto finally; } @@ -2009,8 +2012,6 @@ create_builtin(PyThreadState *tstate, PyObject *name, PyObject *spec) /* If the module was added to the global cache * but def->m_base.m_copy was cleared (e.g. subinterp fini) * then we have to do a little dance here. */ - struct extensions_cache_value *cached - = _extensions_cache_get(info.path, info.name); if (cached != NULL) { assert(cached->def->m_base.m_copy == NULL); /* For now we clear the cache and move on. */ @@ -4361,13 +4362,12 @@ _imp_create_dynamic_impl(PyObject *module, PyObject *spec, PyObject *file) return NULL; } - mod = import_find_extension(tstate, &info); + struct extensions_cache_value *cached = NULL; + mod = import_find_extension(tstate, &info, &cached); if (mod != NULL) { assert(!_PyErr_Occurred(tstate)); -#ifndef NDEBUG - struct extensions_cache_value *cached - = _extensions_cache_get(info.path, info.name); -#endif + assert(cached != NULL); + /* The module might not have md_def set in certain reload cases. */ assert(_PyModule_GetDef(mod) == NULL || cached->def == _PyModule_GetDef(mod)); assert_singlephase(cached); @@ -4381,8 +4381,6 @@ _imp_create_dynamic_impl(PyObject *module, PyObject *spec, PyObject *file) /* If the module was added to the global cache * but def->m_base.m_copy was cleared (e.g. subinterp fini) * then we have to do a little dance here. */ - struct extensions_cache_value *cached - = _extensions_cache_get(info.path, info.name); if (cached != NULL) { assert(cached->def->m_base.m_copy == NULL); /* For now we clear the cache and move on. */ From 5276981d2f61519130a9a954570525b3fb805f7b Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 3 May 2024 17:15:04 -0600 Subject: [PATCH 22/23] Properly sync the cached m_index and the def m_index. --- Python/import.c | 186 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 138 insertions(+), 48 deletions(-) diff --git a/Python/import.c b/Python/import.c index a0920ba2d5d66c..e7fd6cec81c0d1 100644 --- a/Python/import.c +++ b/Python/import.c @@ -446,6 +446,45 @@ _PyImport_GetNextModuleIndex(void) return _Py_atomic_add_ssize(&LAST_MODULE_INDEX, 1) + 1; } +#ifndef NDEBUG +struct extensions_cache_value; +struct extensions_cache_value * _find_cached_def(PyModuleDef *); +static Py_ssize_t _get_cached_module_index(struct extensions_cache_value *); +#endif + +static Py_ssize_t +_get_module_index_from_def(PyModuleDef *def) +{ + Py_ssize_t index = def->m_base.m_index; + assert(index > 0); +#ifndef NDEBUG + struct extensions_cache_value *cached = _find_cached_def(def); + assert(cached == NULL || index == _get_cached_module_index(cached)); +#endif + return index; +} + +static void +_set_module_index(PyModuleDef *def, Py_ssize_t index) +{ + assert(index > 0); + if (index == def->m_base.m_index) { + /* There's nothing to do. */ + } + else if (def->m_base.m_index == 0) { + /* It should have been initialized by PyModuleDef_Init(). + * We assert here to catch this in dev, but keep going otherwise. */ + assert(def->m_base.m_index != 0); + def->m_base.m_index = index; + } + else { + /* It was already set for a different module. + * We replace the old value. */ + assert(def->m_base.m_index > 0); + def->m_base.m_index = index; + } +} + static const char * _modules_by_index_check(PyInterpreterState *interp, Py_ssize_t index) { @@ -462,9 +501,8 @@ _modules_by_index_check(PyInterpreterState *interp, Py_ssize_t index) } static PyObject * -_modules_by_index_get(PyInterpreterState *interp, PyModuleDef *def) +_modules_by_index_get(PyInterpreterState *interp, Py_ssize_t index) { - Py_ssize_t index = def->m_base.m_index; if (_modules_by_index_check(interp, index) != NULL) { return NULL; } @@ -474,11 +512,9 @@ _modules_by_index_get(PyInterpreterState *interp, PyModuleDef *def) static int _modules_by_index_set(PyInterpreterState *interp, - PyModuleDef *def, PyObject *module) + Py_ssize_t index, PyObject *module) { - assert(def != NULL); - assert(def->m_slots == NULL); - assert(def->m_base.m_index > 0); + assert(index > 0); if (MODULES_BY_INDEX(interp) == NULL) { MODULES_BY_INDEX(interp) = PyList_New(0); @@ -487,7 +523,6 @@ _modules_by_index_set(PyInterpreterState *interp, } } - Py_ssize_t index = def->m_base.m_index; while (PyList_GET_SIZE(MODULES_BY_INDEX(interp)) <= index) { if (PyList_Append(MODULES_BY_INDEX(interp), Py_None) < 0) { return -1; @@ -498,9 +533,8 @@ _modules_by_index_set(PyInterpreterState *interp, } static int -_modules_by_index_clear_one(PyInterpreterState *interp, PyModuleDef *def) +_modules_by_index_clear_one(PyInterpreterState *interp, Py_ssize_t index) { - Py_ssize_t index = def->m_base.m_index; const char *err = _modules_by_index_check(interp, index); if (err != NULL) { Py_FatalError(err); @@ -517,7 +551,8 @@ PyState_FindModule(PyModuleDef* module) if (module->m_slots) { return NULL; } - return _modules_by_index_get(interp, module); + Py_ssize_t index = _get_module_index_from_def(module); + return _modules_by_index_get(interp, index); } /* _PyState_AddModule() has been completely removed from the C-API @@ -537,7 +572,9 @@ _PyState_AddModule(PyThreadState *tstate, PyObject* module, PyModuleDef* def) "PyState_AddModule called on module with slots"); return -1; } - return _modules_by_index_set(tstate->interp, def, module); + assert(def->m_slots == NULL); + Py_ssize_t index = _get_module_index_from_def(def); + return _modules_by_index_set(tstate->interp, index, module); } int @@ -557,7 +594,7 @@ PyState_AddModule(PyObject* module, PyModuleDef* def) } PyInterpreterState *interp = tstate->interp; - Py_ssize_t index = def->m_base.m_index; + Py_ssize_t index = _get_module_index_from_def(def); if (MODULES_BY_INDEX(interp) && index < PyList_GET_SIZE(MODULES_BY_INDEX(interp)) && module == PyList_GET_ITEM(MODULES_BY_INDEX(interp), index)) @@ -566,7 +603,8 @@ PyState_AddModule(PyObject* module, PyModuleDef* def) return -1; } - return _modules_by_index_set(interp, def, module); + assert(def->m_slots == NULL); + return _modules_by_index_set(interp, index, module); } int @@ -579,7 +617,8 @@ PyState_RemoveModule(PyModuleDef* def) "PyState_RemoveModule called on module with slots"); return -1; } - return _modules_by_index_clear_one(tstate->interp, def); + Py_ssize_t index = _get_module_index_from_def(def); + return _modules_by_index_clear_one(tstate->interp, index); } @@ -1005,6 +1044,13 @@ free_extensions_cache_value(struct extensions_cache_value *value) PyMem_RawFree(value); } +static Py_ssize_t +_get_cached_module_index(struct extensions_cache_value *cached) +{ + assert(cached->m_index > 0); + return cached->m_index; +} + static void fixup_cached_def(struct extensions_cache_value *value) { @@ -1026,6 +1072,9 @@ fixup_cached_def(struct extensions_cache_value *value) def->m_base.m_init = value->m_init; + assert(value->m_index > 0); + _set_module_index(def, value->m_index); + /* Different modules can share the same def, so we can't just * expect m_copy to be NULL. */ assert(def->m_base.m_copy == NULL @@ -1200,6 +1249,41 @@ hashtable_destroy_str(void *ptr) PyMem_RawFree(ptr); } +#ifndef NDEBUG +struct hashtable_next_match_def_data { + PyModuleDef *def; + struct extensions_cache_value *matched; +}; + +static int +hashtable_next_match_def(_Py_hashtable_t *ht, + const void *key, const void *value, void *user_data) +{ + if (value == NULL) { + /* It was previously deleted. */ + return 0; + } + struct hashtable_next_match_def_data *data + = (struct hashtable_next_match_def_data *)user_data; + struct extensions_cache_value *cur + = (struct extensions_cache_value *)value; + if (cur->def == data->def) { + data->matched = cur; + return 1; + } + return 0; +} + +struct extensions_cache_value * +_find_cached_def(PyModuleDef *def) +{ + struct hashtable_next_match_def_data data = {0}; + (void)_Py_hashtable_foreach( + EXTENSIONS.hashtable, hashtable_next_match_def, &data); + return data.matched; +} +#endif + #define HTSEP ':' static int @@ -1263,13 +1347,17 @@ _extensions_cache_get(PyObject *path, PyObject *name) } /* This can only fail with "out of memory". */ -static int +static struct extensions_cache_value * _extensions_cache_set(PyObject *path, PyObject *name, PyModuleDef *def, PyModInitFunction m_init, Py_ssize_t m_index, PyObject *m_dict, _Py_ext_module_origin origin) { - int res = -1; + struct extensions_cache_value *value = NULL; + void *key = NULL; + struct extensions_cache_value *newvalue = NULL; + PyModuleDef_Base olddefbase = def->m_base; + assert(def != NULL); assert(m_init == NULL || m_dict == NULL); /* We expect the same symbol to be used and the shared object file @@ -1280,11 +1368,6 @@ _extensions_cache_set(PyObject *path, PyObject *name, assert((origin == _Py_ext_module_origin_DYNAMIC) == (name != path)); assert(origin != _Py_ext_module_origin_CORE || m_dict == NULL); - void *key = NULL; - struct extensions_cache_value *value = NULL; - struct extensions_cache_value *newvalue = NULL; - PyModuleDef_Base olddefbase = def->m_base; - extensions_lock_acquire(); if (EXTENSIONS.hashtable == NULL) { @@ -1350,10 +1433,10 @@ _extensions_cache_set(PyObject *path, PyObject *name, Py_UNREACHABLE(); } - res = 0; + value = newvalue; finally: - if (res < 0) { + if (value == NULL) { restore_old_cached_def(def, &olddefbase); if (newvalue != NULL) { del_extensions_cache_value(newvalue); @@ -1368,7 +1451,7 @@ _extensions_cache_set(PyObject *path, PyObject *name, hashtable_destroy_str(key); } - return res; + return value; } static void @@ -1544,12 +1627,13 @@ struct singlephase_global_update { _Py_ext_module_origin origin; }; -static int +static struct extensions_cache_value * update_global_state_for_extension(PyThreadState *tstate, PyObject *path, PyObject *name, PyModuleDef *def, struct singlephase_global_update *singlephase) { + struct extensions_cache_value *cached = NULL; PyModInitFunction m_init = NULL; PyObject *m_dict = NULL; @@ -1594,34 +1678,34 @@ update_global_state_for_extension(PyThreadState *tstate, // XXX Why special-case the main interpreter? if (_Py_IsMainInterpreter(tstate->interp) || def->m_size == -1) { #ifndef NDEBUG - struct extensions_cache_value *cached - = _extensions_cache_get(path, name); + cached = _extensions_cache_get(path, name); assert(cached == NULL || cached->def == def); #endif - if (_extensions_cache_set( + cached = _extensions_cache_set( path, name, def, m_init, singlephase->m_index, m_dict, - singlephase->origin) < 0) - { + singlephase->origin); + if (cached == NULL) { // XXX Ignore this error? Doing so would effectively // mark the module as not loadable. - return -1; + return NULL; } } - return 0; + return cached; } /* For multi-phase init modules, the module is finished * by PyModule_FromDefAndSpec(). */ static int -finish_singlephase_extension(PyThreadState *tstate, - PyObject *mod, PyModuleDef *def, +finish_singlephase_extension(PyThreadState *tstate, PyObject *mod, + struct extensions_cache_value *cached, PyObject *name, PyObject *modules) { assert(mod != NULL && PyModule_Check(mod)); - assert(def == _PyModule_GetDef(mod)); + assert(cached->def == _PyModule_GetDef(mod)); - if (_modules_by_index_set(tstate->interp, def, mod) < 0) { + Py_ssize_t index = _get_cached_module_index(cached); + if (_modules_by_index_set(tstate->interp, index, mod) < 0) { return -1; } @@ -1729,7 +1813,8 @@ reload_singlephase_extension(PyThreadState *tstate, } } - if (_modules_by_index_set(tstate->interp, def, mod) < 0) { + Py_ssize_t index = _get_cached_module_index(cached); + if (_modules_by_index_set(tstate->interp, index, mod) < 0) { PyMapping_DelItem(modules, info->name); Py_DECREF(mod); return NULL; @@ -1786,6 +1871,7 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, PyObject *mod = NULL; PyModuleDef *def = NULL; + struct extensions_cache_value *cached = NULL; const char *name_buf = PyBytes_AS_STRING(info->name_encoded); struct _Py_ext_module_loader_result res; @@ -1828,6 +1914,7 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, } /* Update global import state. */ + assert(def->m_base.m_index != 0); struct singlephase_global_update singlephase = { // XXX Modules that share a def should each get their own index, // whereas currently they share (which means the per-interpreter @@ -1849,16 +1936,16 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, assert(def->m_base.m_copy == NULL); singlephase.m_init = p0; } - if (update_global_state_for_extension( - tstate, info->path, info->name, def, &singlephase) < 0) - { + cached = update_global_state_for_extension( + tstate, info->path, info->name, def, &singlephase); + if (cached == NULL) { goto error; } /* Update per-interpreter import state. */ PyObject *modules = get_modules_dict(tstate, true); if (finish_singlephase_extension( - tstate, mod, def, info->name, modules) < 0) + tstate, mod, cached, info->name, modules) < 0) { goto error; } @@ -1893,8 +1980,9 @@ clear_singlephase_extension(PyInterpreterState *interp, // We leave m_index alone since there's no reason to reset it. /* Clear the PyState_*Module() cache entry. */ - if (_modules_by_index_check(interp, def->m_base.m_index) == NULL) { - if (_modules_by_index_clear_one(interp, def) < 0) { + Py_ssize_t index = _get_cached_module_index(cached); + if (_modules_by_index_check(interp, index) == NULL) { + if (_modules_by_index_clear_one(interp, index) < 0) { return -1; } } @@ -1943,21 +2031,23 @@ _PyImport_FixupBuiltin(PyThreadState *tstate, PyObject *mod, const char *name, * so we have to do the extra check to make sure the module * isn't already in the global cache before calling * update_global_state_for_extension(). */ - if (_extensions_cache_get(nameobj, nameobj) == NULL) { + struct extensions_cache_value *cached + = _extensions_cache_get(nameobj, nameobj); + if (cached == NULL) { struct singlephase_global_update singlephase = { .m_index=def->m_base.m_index, /* We don't want def->m_base.m_copy populated. */ .m_dict=NULL, .origin=_Py_ext_module_origin_CORE, }; - if (update_global_state_for_extension( - tstate, nameobj, nameobj, def, &singlephase) < 0) - { + cached = update_global_state_for_extension( + tstate, nameobj, nameobj, def, &singlephase); + if (cached == NULL) { goto finally; } } - if (finish_singlephase_extension(tstate, mod, def, nameobj, modules) < 0) { + if (finish_singlephase_extension(tstate, mod, cached, nameobj, modules) < 0) { goto finally; } From 7d3dbc18e643d060a3b89d939767b73a5ff4a436 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Sat, 4 May 2024 15:00:35 -0600 Subject: [PATCH 23/23] Fix make smelly. --- Python/import.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/import.c b/Python/import.c index b484e2d28d2625..fa0e548c9bf4c4 100644 --- a/Python/import.c +++ b/Python/import.c @@ -448,7 +448,7 @@ _PyImport_GetNextModuleIndex(void) #ifndef NDEBUG struct extensions_cache_value; -struct extensions_cache_value * _find_cached_def(PyModuleDef *); +static struct extensions_cache_value * _find_cached_def(PyModuleDef *); static Py_ssize_t _get_cached_module_index(struct extensions_cache_value *); #endif @@ -1274,7 +1274,7 @@ hashtable_next_match_def(_Py_hashtable_t *ht, return 0; } -struct extensions_cache_value * +static struct extensions_cache_value * _find_cached_def(PyModuleDef *def) { struct hashtable_next_match_def_data data = {0}; pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy