diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 0e3d46852f2e6d..5a54608ae3db81 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -18,6 +18,7 @@ extern "C" { #include "pycore_dict_state.h" // struct _Py_dict_state #include "pycore_exceptions.h" // struct _Py_exc_state #include "pycore_floatobject.h" // struct _Py_float_state +#include "pycore_pymem.h" // free lists #include "pycore_function.h" // FUNC_MAX_WATCHERS #include "pycore_genobject.h" // struct _Py_async_gen_state #include "pycore_gc.h" // struct _gc_runtime_state @@ -50,6 +51,9 @@ struct _Py_long_state { /* interpreter state */ +#define SMALL_OBJECT_FREELIST_SIZE 1024 +#define INTERP_NUM_FREELISTS 30 + /* PyInterpreterState holds the global state for one of the runtime's interpreters. Typically the initial (main) interpreter is the only one. @@ -178,6 +182,7 @@ struct _is { struct _Py_context_state context; struct _Py_exc_state exc_state; + _PyFreeList freelists[INTERP_NUM_FREELISTS]; struct ast_state ast; struct types_state types; struct callable_cache callable_cache; @@ -230,6 +235,36 @@ PyAPI_FUNC(int) _PyInterpreterState_IDInitref(PyInterpreterState *); PyAPI_FUNC(int) _PyInterpreterState_IDIncref(PyInterpreterState *); PyAPI_FUNC(void) _PyInterpreterState_IDDecref(PyInterpreterState *); +#if SIZEOF_VOID_P == 4 +#define LOG_BASE_2_OF_FREELIST_QUANTUM 3 +#elif SIZEOF_VOID_P == 8 +#define LOG_BASE_2_OF_FREELIST_QUANTUM 4 +#else +#error "void pointer size not in (32, 64)" +#endif + +#define FREELIST_QUANTUM (2*SIZEOF_VOID_P) +#define SIZE_TO_FREELIST_SIZE_CLASS(size) (((size) + FREELIST_QUANTUM - 1) >> \ + LOG_BASE_2_OF_FREELIST_QUANTUM) +#define FREELIST_INDEX_TO_ALLOCATED_SIZE(idx) ((idx) * FREELIST_QUANTUM) + +static inline PyObject* +_PyInterpreterState_FreelistAlloc(PyInterpreterState *interp, Py_ssize_t size) { + Py_ssize_t index = SIZE_TO_FREELIST_SIZE_CLASS(size); + assert(index >= 0 && index < INTERP_NUM_FREELISTS); + return _PyFreeList_Alloc(&interp->freelists[index]); +} + +static inline void +_PyInterpreterState_FreelistFree(PyInterpreterState * interp, PyObject *op, Py_ssize_t size) { + /* todo: assert the size is correct? */ + Py_ssize_t index = SIZE_TO_FREELIST_SIZE_CLASS(size); + assert(index >= 0 && index < INTERP_NUM_FREELISTS); + _PyFreeList_Free(&interp->freelists[index], op); +} + + + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index 8c1d017bb95e4e..9a93184cc0c28a 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -129,6 +129,8 @@ _PyLong_IsPositiveSingleDigit(PyObject* sub) { return ((size_t)signed_size) <= 1; } +void _PyLong_Free(PyLongObject *op); + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index 4cc953d8d779c9..86e0ea1c5e1a02 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -91,6 +91,80 @@ PyAPI_FUNC(int) _PyMem_GetAllocatorName( PyAPI_FUNC(int) _PyMem_SetupAllocators(PyMemAllocatorName allocator); +/* Free lists. + * + * Free lists have a pointer to their first entry and + * the amount of space available allowing fast checks + * for emptiness and fullness. + * When empty they are half filled and when full they are + * completely emptied. This helps the underlying allocator + * avoid fragmentation and helps performance. + */ + +typedef struct _freelist { + void *ptr; + uint32_t space; + uint16_t size; + uint16_t capacity; +#ifdef Py_STATS + int size_class; +#endif +} _PyFreeList; + +extern void *_PyFreeList_HalfFillAndAllocate(_PyFreeList *list); +extern void _PyFreeList_FreeToFull(_PyFreeList *list, void *ptr); +extern void _PyFreeList_Clear(_PyFreeList *list); +extern void _PyFreeList_Disable(_PyFreeList *list); + +static inline void * +_PyFreeList_Alloc(_PyFreeList *list) { +#ifdef Py_STATS + if (_py_stats) _py_stats->freelist_stats[list->size_class].allocations++; +#endif + if (list->ptr != NULL) { + void *result = list->ptr; + list->ptr = *((void **)result); + list->space++; + return result; + } +#ifdef Py_STATS + if (_py_stats) _py_stats->freelist_stats[list->size_class].empty++; +#endif + return _PyFreeList_HalfFillAndAllocate(list); +} + +static inline void +_PyFreeList_Free(_PyFreeList *list, void *ptr) { +#ifdef Py_STATS + if (_py_stats) _py_stats->freelist_stats[list->size_class].frees++; +#endif + if (list->space) { + *((void **)ptr) = list->ptr; + list->ptr = ptr; + list->space--; + return; + } +#ifdef Py_STATS + if (_py_stats) _py_stats->freelist_stats[list->size_class].full++; +#endif + _PyFreeList_FreeToFull(list, ptr); +} + +static inline void +_PyFreeList_Init(_PyFreeList *list, int size_class, int size, int capacity) +{ + list->ptr = NULL; + list->size = size; +#ifdef Py_STATS + list->size_class = size_class; +#endif +#if WITH_FREELISTS + list->space = list->capacity = capacity; +#else + _PyFreeList_Disable(list); +#endif +} + #ifdef __cplusplus } #endif diff --git a/Include/pystats.h b/Include/pystats.h index 25ed4bddc7240c..1d14960d31cbdc 100644 --- a/Include/pystats.h +++ b/Include/pystats.h @@ -48,6 +48,13 @@ typedef struct _call_stats { uint64_t eval_calls[EVAL_CALL_KINDS]; } CallStats; +typedef struct _generic_freelist_stats { + uint64_t allocations; + uint64_t frees; + uint64_t empty; + uint64_t full; +} GenericFreelistStats; + typedef struct _object_stats { uint64_t increfs; uint64_t decrefs; @@ -78,6 +85,7 @@ typedef struct _stats { OpcodeStats opcode_stats[256]; CallStats call_stats; ObjectStats object_stats; + GenericFreelistStats freelist_stats[30]; // INTERP_NUM_FREELISTS } PyStats; diff --git a/Objects/longobject.c b/Objects/longobject.c index 8293f133bed213..248ca83f2d7c96 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -6,6 +6,7 @@ #include "pycore_bitutils.h" // _Py_popcount32() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_long.h" // _Py_SmallInts +#include "pycore_pymem.h" // Free lists #include "pycore_object.h" // _PyObject_InitVar() #include "pycore_pystate.h" // _Py_IsMainInterpreter() #include "pycore_runtime.h" // _PY_NSMALLPOSINTS @@ -46,7 +47,7 @@ static inline void _Py_DECREF_INT(PyLongObject *op) { assert(PyLong_CheckExact(op)); - _Py_DECREF_SPECIALIZED((PyObject *)op, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED((PyObject *)op, (destructor)_PyLong_Free); } static inline int @@ -152,16 +153,20 @@ _PyLong_New(Py_ssize_t size) "too many digits in integer"); return NULL; } - /* Fast operations for single digit integers (including zero) - * assume that there is always at least one digit present. */ - Py_ssize_t ndigits = size ? size : 1; - /* Number of bytes needed is: offsetof(PyLongObject, ob_digit) + - sizeof(digit)*size. Previous incarnations of this code used - sizeof(PyVarObject) instead of the offsetof, but this risks being - incorrect in the presence of padding between the PyVarObject header - and the digits. */ - result = PyObject_Malloc(offsetof(PyLongObject, long_value.ob_digit) + - ndigits*sizeof(digit)); + assert(size >= 0); + if (size <= 1) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + result = (PyLongObject *)_PyInterpreterState_FreelistAlloc(interp, sizeof(PyLongObject)); + } + else { + /* Number of bytes needed is: offsetof(PyLongObject, ob_digit) + + sizeof(digit)*size. Previous incarnations of this code used + sizeof(PyVarObject) instead of the offsetof, but this risks being + incorrect in the presence of padding between the PyVarObject header + and the digits. */ + result = PyObject_Malloc(offsetof(PyLongObject, long_value.ob_digit) + + size*sizeof(digit)); + } if (!result) { PyErr_NoMemory(); return NULL; @@ -201,11 +206,11 @@ _PyLong_FromMedium(sdigit x) { assert(!IS_SMALL_INT(x)); assert(is_medium_int(x)); - /* We could use a freelist here */ - PyLongObject *v = PyObject_Malloc(sizeof(PyLongObject)); + PyInterpreterState *interp = _PyInterpreterState_GET(); + PyLongObject *v = (PyLongObject *)_PyInterpreterState_FreelistAlloc( + interp, sizeof(PyLongObject)); if (v == NULL) { - PyErr_NoMemory(); - return NULL; + return PyErr_NoMemory(); } Py_ssize_t sign = x < 0 ? -1: 1; digit abs_x = x < 0 ? -x : x; @@ -267,6 +272,19 @@ _PyLong_FromSTwoDigits(stwodigits x) return _PyLong_FromLarge(x); } +void +_PyLong_Free(PyLongObject *op) +{ + if (PyLong_CheckExact(op) && IS_MEDIUM_VALUE(op)) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyInterpreterState_FreelistFree(interp, (PyObject*)op, sizeof(PyLongObject)); + } + else + { + Py_TYPE(op)->tp_free((PyObject *)op); + } +} + int _PyLong_AssignValue(PyObject **target, Py_ssize_t value) { @@ -6286,7 +6304,7 @@ PyTypeObject PyLong_Type = { "int", /* tp_name */ offsetof(PyLongObject, long_value.ob_digit), /* tp_basicsize */ sizeof(digit), /* tp_itemsize */ - 0, /* tp_dealloc */ + (destructor)_PyLong_Free, /* tp_dealloc */ 0, /* tp_vectorcall_offset */ 0, /* tp_getattr */ 0, /* tp_setattr */ diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 276c5a276c06e6..73cbccc980b9a9 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -717,6 +717,64 @@ PyObject_Free(void *ptr) # define LIKELY(value) (value) #endif + +void * +_PyFreeList_HalfFillAndAllocate(_PyFreeList *list) +{ + assert(list->ptr == NULL); + if (list->capacity < 4) { + return PyObject_Malloc(list->size); + } + uint32_t i = 0; + for (; i < list->space>>1; i++) { + void* ptr = PyObject_Malloc(list->size); + if (ptr == NULL) { + break; + } + *((void**)ptr) = list->ptr; + list->ptr = ptr; + } + if (i == 0) { + return NULL; + } + void *result = list->ptr; + list->ptr = *((void **)result); + list->space -= (i-1); + return result; +} + +void +_PyFreeList_Clear(_PyFreeList *list) +{ + int space = 0; + void *head = list->ptr; + while (head) { + void *next = *((void**)head); + PyObject_Free(head); + head = next; + space++; + } + list->ptr = NULL; + list->space += space; +} + +void +_PyFreeList_Disable(_PyFreeList *list) +{ + list->space = list->capacity = 0; +} + +void +_PyFreeList_FreeToFull(_PyFreeList *list, void *ptr) +{ + assert(list->space == 0); + PyObject_Free(ptr); + if (list->ptr == NULL) { + return; + } + _PyFreeList_Clear(list); +} + #ifdef WITH_PYMALLOC #ifdef WITH_VALGRIND diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 1169d8d172dd57..da45d0511cbdf8 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -183,8 +183,8 @@ dummy_func( DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP); STAT_INC(BINARY_OP, hit); prod = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); - _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(right, (destructor)_PyLong_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)_PyLong_Free); ERROR_IF(prod == NULL, error); } @@ -207,8 +207,8 @@ dummy_func( DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP); STAT_INC(BINARY_OP, hit); sub = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); - _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(right, (destructor)_PyLong_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)_PyLong_Free); ERROR_IF(sub == NULL, error); } @@ -290,8 +290,8 @@ dummy_func( DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); STAT_INC(BINARY_OP, hit); sum = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); - _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(right, (destructor)_PyLong_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)_PyLong_Free); ERROR_IF(sum == NULL, error); } @@ -364,7 +364,7 @@ dummy_func( res = PyList_GET_ITEM(list, index); assert(res != NULL); Py_INCREF(res); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(sub, (destructor)_PyLong_Free); Py_DECREF(list); } @@ -382,7 +382,7 @@ dummy_func( res = PyTuple_GET_ITEM(tuple, index); assert(res != NULL); Py_INCREF(res); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(sub, (destructor)_PyLong_Free); Py_DECREF(tuple); } @@ -478,7 +478,7 @@ dummy_func( PyList_SET_ITEM(list, index, value); assert(old_value != NULL); Py_DECREF(old_value); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(sub, (destructor)_PyLong_Free); Py_DECREF(list); } @@ -1775,8 +1775,8 @@ dummy_func( Py_ssize_t iright = Py_SIZE(right) * ((PyLongObject *)right)->long_value.ob_digit[0]; // 2 if <, 4 if >, 8 if ==; this matches the low 4 bits of the oparg int sign_ish = COMPARISON_BIT(ileft, iright); - _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)_PyLong_Free); + _Py_DECREF_SPECIALIZED(right, (destructor)_PyLong_Free); if (sign_ish & oparg) { int offset = _Py_OPARG(next_instr[1]); JUMPBY(offset); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 09eb6893ebf6b4..9ff7525a680370 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -248,8 +248,8 @@ DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP); STAT_INC(BINARY_OP, hit); prod = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); - _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(right, (destructor)_PyLong_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)_PyLong_Free); if (prod == NULL) goto pop_2_error; STACK_SHRINK(1); POKE(1, prod); @@ -286,8 +286,8 @@ DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP); STAT_INC(BINARY_OP, hit); sub = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); - _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(right, (destructor)_PyLong_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)_PyLong_Free); if (sub == NULL) goto pop_2_error; STACK_SHRINK(1); POKE(1, sub); @@ -395,8 +395,8 @@ DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); STAT_INC(BINARY_OP, hit); sum = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); - _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(right, (destructor)_PyLong_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)_PyLong_Free); if (sum == NULL) goto pop_2_error; STACK_SHRINK(1); POKE(1, sum); @@ -491,7 +491,7 @@ res = PyList_GET_ITEM(list, index); assert(res != NULL); Py_INCREF(res); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(sub, (destructor)_PyLong_Free); Py_DECREF(list); STACK_SHRINK(1); POKE(1, res); @@ -516,7 +516,7 @@ res = PyTuple_GET_ITEM(tuple, index); assert(res != NULL); Py_INCREF(res); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(sub, (destructor)_PyLong_Free); Py_DECREF(tuple); STACK_SHRINK(1); POKE(1, res); @@ -644,7 +644,7 @@ PyList_SET_ITEM(list, index, value); assert(old_value != NULL); Py_DECREF(old_value); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(sub, (destructor)_PyLong_Free); Py_DECREF(list); STACK_SHRINK(3); JUMPBY(1); @@ -2236,8 +2236,8 @@ Py_ssize_t iright = Py_SIZE(right) * ((PyLongObject *)right)->long_value.ob_digit[0]; // 2 if <, 4 if >, 8 if ==; this matches the low 4 bits of the oparg int sign_ish = COMPARISON_BIT(ileft, iright); - _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)_PyLong_Free); + _Py_DECREF_SPECIALIZED(right, (destructor)_PyLong_Free); if (sign_ish & oparg) { int offset = _Py_OPARG(next_instr[1]); JUMPBY(offset); diff --git a/Python/pystate.c b/Python/pystate.c index 1261092d1435fa..d0a52beb35eb68 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -717,6 +717,12 @@ PyInterpreterState_New(void) init_interpreter(interp, runtime, id, old_head, pending_lock); + for (int i=0; i < INTERP_NUM_FREELISTS; i++) { + _PyFreeList_Init(&interp->freelists[i], i, + FREELIST_INDEX_TO_ALLOCATED_SIZE(i), + SMALL_OBJECT_FREELIST_SIZE); + } + HEAD_UNLOCK(runtime); return interp; @@ -759,6 +765,11 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) } HEAD_UNLOCK(runtime); + for (int i=0; i < INTERP_NUM_FREELISTS; i++) { + _PyFreeList_Clear(&interp->freelists[i]); + _PyFreeList_Disable(&interp->freelists[i]); + } + /* It is possible that any of the objects below have a finalizer that runs Python code or otherwise relies on a thread state or even the interpreter state. For now we trust that isn't diff --git a/Python/specialize.c b/Python/specialize.c index 908ad6dceb57f3..8ec4978b3a886f 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -195,11 +195,26 @@ print_object_stats(FILE *out, ObjectStats *stats) fprintf(out, "Object method cache dunder misses: %" PRIu64 "\n", stats->type_cache_dunder_misses); } +static void +print_freelist_stats(FILE *out, GenericFreelistStats freelist_stats[INTERP_NUM_FREELISTS]) +{ + for (int i=0; iallocations > 0) { + fprintf(out, "Allocations from freelist[%d]: %" PRIu64 "\n", i, stats->allocations); + fprintf(out, "Frees into freelist[%d]: %" PRIu64 "\n", i, stats->frees); + fprintf(out, "Freelist[%d] empty: %" PRIu64 "\n", i, stats->empty); + fprintf(out, "Freelist[%d] full: %" PRIu64 "\n", i, stats->full); + } + } +} + static void print_stats(FILE *out, PyStats *stats) { print_spec_stats(out, stats->opcode_stats); print_call_stats(out, &stats->call_stats); print_object_stats(out, &stats->object_stats); + print_freelist_stats(out, stats->freelist_stats); } void pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy