From 3a2ccdad55233715cc42559d5fe0872f3ae80867 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 2 May 2025 10:14:10 +0100 Subject: [PATCH 1/6] Track the current executor on the thread-state, not the previous one. Batch executors for deallocation to avoid having to constantly incref executors; this is an ad-hoc form of deferred reference counting. --- Include/cpython/pystate.h | 2 +- Include/internal/pycore_interp_structs.h | 2 + Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_optimizer.h | 7 +++ Include/internal/pycore_uop_metadata.h | 2 +- Python/bytecodes.c | 24 +++++---- Python/ceval.c | 47 ++++++++++------- Python/ceval_macros.h | 5 +- Python/executor_cases.c.h | 11 ++-- Python/generated_cases.c.h | 20 ++++++-- Python/optimizer.c | 62 +++++++++++++++++++++-- Python/pystate.c | 8 ++- 12 files changed, 143 insertions(+), 49 deletions(-) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 97c097aa01c508..7f1bc363861ddf 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -194,7 +194,7 @@ struct _ts { /* The thread's exception stack entry. (Always the last entry.) */ _PyErr_StackItem exc_state; - PyObject *previous_executor; + PyObject *current_executor; uint64_t dict_global_version; diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index af6ee3ab48939f..45d878af967b86 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -923,6 +923,8 @@ struct _is { PyObject *common_consts[NUM_COMMON_CONSTANTS]; bool jit; struct _PyExecutorObject *executor_list_head; + struct _PyExecutorObject *executor_deletion_list_head; + int executor_deletion_list_remaining_capacity; size_t trace_run_counter; _rare_events rare_events; PyDict_WatchCallback builtins_dict_watcher; diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 852d6313394918..20943b71640c2e 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1195,7 +1195,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [INSTRUMENTED_RESUME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_RETURN_VALUE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_YIELD_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [INTERPRETER_EXIT] = { true, INSTR_FMT_IX, 0 }, + [INTERPRETER_EXIT] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, [IS_OP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [JUMP_BACKWARD] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [JUMP_BACKWARD_JIT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 4af1fa63ac1f1a..e756b2ffeb97ad 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -84,6 +84,10 @@ typedef struct _PyExecutorObject { _PyExitData exits[1]; } _PyExecutorObject; +/* If pending deletion list gets large enough, then scan, + * and free any executors that aren't executing + * i.e. any that aren't a thread's current_executor. */ +#define EXECUTOR_DELETE_LIST_MAX 100 // Export for '_opcode' shared extension (JIT compiler). PyAPI_FUNC(_PyExecutorObject*) _Py_GetExecutor(PyCodeObject *code, int offset); @@ -304,6 +308,9 @@ static inline int is_terminator(const _PyUOpInstruction *uop) } PyAPI_FUNC(int) _PyDumpExecutors(FILE *out); +#ifdef _Py_TIER2 +extern void _Py_ClearExecutorDeletionList(PyInterpreterState *interp); +#endif #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 03113cd4c963b7..b2dd8242623f5b 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -300,7 +300,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_POP_TOP_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_POP_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_CHECK_FUNCTION] = HAS_DEOPT_FLAG, - [_START_EXECUTOR] = HAS_ESCAPES_FLAG, + [_START_EXECUTOR] = 0, [_MAKE_WARM] = 0, [_FATAL_ERROR] = 0, [_DEOPT] = 0, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index b6fde3f1029421..2fbf61007cdf6c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1169,6 +1169,12 @@ dummy_func( tstate->current_frame = frame->previous; assert(!_PyErr_Occurred(tstate)); PyObject *result = PyStackRef_AsPyObjectSteal(retval); + _PyStackRef executor = entry.frame.localsplus[0]; + assert(tstate->current_executor == NULL); + if (!PyStackRef_IsNull(executor)) { + tstate->current_executor = PyStackRef_AsPyObjectBorrow(executor); + PyStackRef_CLOSE(executor); + } LLTRACE_RESUME_FRAME(); return result; } @@ -2912,8 +2918,8 @@ dummy_func( } else { this_instr[1].counter = initial_jump_backoff_counter(); - assert(tstate->previous_executor == NULL); - tstate->previous_executor = Py_None; + assert(tstate->current_executor == NULL); + tstate->current_executor = (PyObject *)executor; GOTO_TIER_TWO(executor); } } @@ -2965,7 +2971,7 @@ dummy_func( assert(executor->vm_data.index == INSTR_OFFSET() - 1); assert(executor->vm_data.code == code); assert(executor->vm_data.valid); - assert(tstate->previous_executor == NULL); + assert(tstate->current_executor == NULL); /* If the eval breaker is set then stay in tier 1. * This avoids any potentially infinite loops * involving _RESUME_CHECK */ @@ -2978,8 +2984,7 @@ dummy_func( } DISPATCH_GOTO(); } - tstate->previous_executor = Py_None; - Py_INCREF(executor); + tstate->current_executor = (PyObject *)executor; GOTO_TIER_TWO(executor); #else Py_FatalError("ENTER_EXECUTOR is not supported in this build"); @@ -5247,7 +5252,7 @@ dummy_func( exit->temperature = initial_temperature_backoff_counter(); Py_CLEAR(exit->executor); } - tstate->previous_executor = (PyObject *)current_executor; + tstate->current_executor = NULL; if (exit->executor == NULL) { _Py_BackoffCounter temperature = exit->temperature; if (!backoff_counter_triggers(temperature)) { @@ -5270,7 +5275,7 @@ dummy_func( } exit->executor = executor; } - Py_INCREF(exit->executor); + tstate->current_executor = (PyObject *)exit->executor; GOTO_TIER_TWO(exit->executor); } @@ -5309,7 +5314,6 @@ dummy_func( } tier2 op(_START_EXECUTOR, (executor/4 --)) { - Py_CLEAR(tstate->previous_executor); #ifndef _Py_JIT current_executor = (_PyExecutorObject*)executor; #endif @@ -5330,12 +5334,12 @@ dummy_func( } tier2 op(_DEOPT, (--)) { - tstate->previous_executor = (PyObject *)current_executor; + tstate->current_executor = NULL; GOTO_TIER_ONE(_PyFrame_GetBytecode(frame) + CURRENT_TARGET()); } tier2 op(_ERROR_POP_N, (target/2 --)) { - tstate->previous_executor = (PyObject *)current_executor; + tstate->current_executor = NULL; assert(oparg == 0); frame->instr_ptr = _PyFrame_GetBytecode(frame) + target; SYNC_SP(); diff --git a/Python/ceval.c b/Python/ceval.c index c777e7944f6c4c..c4f91d5654b9b7 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -990,6 +990,11 @@ _PyObjectArray_Free(PyObject **array, PyObject **scratch) #define DONT_SLP_VECTORIZE #endif +typedef struct { + _PyInterpreterFrame frame; + _PyStackRef stack[1]; +} _PyEntryFrame; + PyObject* _Py_HOT_FUNCTION DONT_SLP_VECTORIZE _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int throwflag) { @@ -1009,7 +1014,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int int oparg; /* Current opcode argument, if any */ assert(tstate->current_frame == NULL || tstate->current_frame->stackpointer != NULL); #endif - _PyInterpreterFrame entry_frame; + _PyEntryFrame entry; if (_Py_EnterRecursiveCallTstate(tstate, "")) { assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); @@ -1021,31 +1026,39 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int * These are cached values from the frame and code object. */ _Py_CODEUNIT *next_instr; _PyStackRef *stack_pointer; - entry_frame.localsplus[0] = PyStackRef_NULL; + entry.stack[0] = PyStackRef_NULL; #ifdef Py_STACKREF_DEBUG - entry_frame.f_funcobj = PyStackRef_None; + entry.frame.f_funcobj = PyStackRef_None; #elif defined(Py_DEBUG) /* Set these to invalid but identifiable values for debugging. */ - entry_frame.f_funcobj = (_PyStackRef){.bits = 0xaaa0}; - entry_frame.f_locals = (PyObject*)0xaaa1; - entry_frame.frame_obj = (PyFrameObject*)0xaaa2; - entry_frame.f_globals = (PyObject*)0xaaa3; - entry_frame.f_builtins = (PyObject*)0xaaa4; + entry.frame.f_funcobj = (_PyStackRef){.bits = 0xaaa0}; + entry.frame.f_locals = (PyObject*)0xaaa1; + entry.frame.frame_obj = (PyFrameObject*)0xaaa2; + entry.frame.f_globals = (PyObject*)0xaaa3; + entry.frame.f_builtins = (PyObject*)0xaaa4; #endif - entry_frame.f_executable = PyStackRef_None; - entry_frame.instr_ptr = (_Py_CODEUNIT *)_Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS + 1; - entry_frame.stackpointer = entry_frame.localsplus; - entry_frame.owner = FRAME_OWNED_BY_INTERPRETER; - entry_frame.visited = 0; - entry_frame.return_offset = 0; + entry.frame.f_executable = PyStackRef_None; + entry.frame.instr_ptr = (_Py_CODEUNIT *)_Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS + 1; + entry.frame.stackpointer = entry.stack; + entry.frame.owner = FRAME_OWNED_BY_INTERPRETER; + entry.frame.visited = 0; + entry.frame.return_offset = 0; #ifdef Py_DEBUG - entry_frame.lltrace = 0; + entry.frame.lltrace = 0; #endif /* Push frame */ - entry_frame.previous = tstate->current_frame; - frame->previous = &entry_frame; + entry.frame.previous = tstate->current_frame; + frame->previous = &entry.frame; tstate->current_frame = frame; + if (tstate->current_executor != NULL) { + entry.frame.localsplus[0] = PyStackRef_FromPyObjectNew(tstate->current_executor); + tstate->current_executor = NULL; + } + else { + entry.frame.localsplus[0] = PyStackRef_NULL; + } + /* support for generator.throw() */ if (throwflag) { if (_Py_EnterRecursivePy(tstate)) { diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index e1d2673848cc69..e3852b7b55850e 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -359,12 +359,13 @@ _PyFrame_SetStackPointer(frame, stack_pointer) do { \ OPT_STAT_INC(traces_executed); \ _PyExecutorObject *_executor = (EXECUTOR); \ + assert(tstate->current_executor == _executor); \ jit_func jitted = _executor->jit_code; \ /* Keep the shim frame alive via the executor: */ \ Py_INCREF(_executor); \ next_instr = jitted(frame, stack_pointer, tstate); \ Py_DECREF(_executor); \ - Py_CLEAR(tstate->previous_executor); \ + tstate->current_executor = NULL; \ frame = tstate->current_frame; \ stack_pointer = _PyFrame_GetStackPointer(frame); \ if (next_instr == NULL) { \ @@ -387,9 +388,9 @@ do { \ do \ { \ next_instr = (TARGET); \ + assert(tstate->current_executor == NULL); \ OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); \ _PyFrame_SetStackPointer(frame, stack_pointer); \ - Py_CLEAR(tstate->previous_executor); \ stack_pointer = _PyFrame_GetStackPointer(frame); \ if (next_instr == NULL) \ { \ diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 040be54f9b87ff..e91867016d1d17 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -6965,7 +6965,7 @@ Py_CLEAR(exit->executor); stack_pointer = _PyFrame_GetStackPointer(frame); } - tstate->previous_executor = (PyObject *)current_executor; + tstate->current_executor = NULL; if (exit->executor == NULL) { _Py_BackoffCounter temperature = exit->temperature; if (!backoff_counter_triggers(temperature)) { @@ -6990,7 +6990,7 @@ } exit->executor = executor; } - Py_INCREF(exit->executor); + tstate->current_executor = (PyObject *)exit->executor; GOTO_TIER_TWO(exit->executor); break; } @@ -7094,9 +7094,6 @@ case _START_EXECUTOR: { PyObject *executor = (PyObject *)CURRENT_OPERAND0(); - _PyFrame_SetStackPointer(frame, stack_pointer); - Py_CLEAR(tstate->previous_executor); - stack_pointer = _PyFrame_GetStackPointer(frame); #ifndef _Py_JIT current_executor = (_PyExecutorObject*)executor; #endif @@ -7119,7 +7116,7 @@ } case _DEOPT: { - tstate->previous_executor = (PyObject *)current_executor; + tstate->current_executor = NULL; GOTO_TIER_ONE(_PyFrame_GetBytecode(frame) + CURRENT_TARGET()); break; } @@ -7127,7 +7124,7 @@ case _ERROR_POP_N: { oparg = CURRENT_OPARG(); uint32_t target = (uint32_t)CURRENT_OPERAND0(); - tstate->previous_executor = (PyObject *)current_executor; + tstate->current_executor = NULL; assert(oparg == 0); frame->instr_ptr = _PyFrame_GetBytecode(frame) + target; GOTO_TIER_ONE(NULL); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 004f4db593dc10..1a217d44f3e56f 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -5535,7 +5535,7 @@ assert(executor->vm_data.index == INSTR_OFFSET() - 1); assert(executor->vm_data.code == code); assert(executor->vm_data.valid); - assert(tstate->previous_executor == NULL); + assert(tstate->current_executor == NULL); if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) { opcode = executor->vm_data.opcode; oparg = (oparg & ~255) | executor->vm_data.oparg; @@ -5545,8 +5545,7 @@ } DISPATCH_GOTO(); } - tstate->previous_executor = Py_None; - Py_INCREF(executor); + tstate->current_executor = (PyObject *)executor; GOTO_TIER_TWO(executor); #else Py_FatalError("ENTER_EXECUTOR is not supported in this build"); @@ -7630,6 +7629,17 @@ tstate->current_frame = frame->previous; assert(!_PyErr_Occurred(tstate)); PyObject *result = PyStackRef_AsPyObjectSteal(retval); + _PyStackRef executor = entry.frame.localsplus[0]; + assert(tstate->current_executor == NULL); + if (!PyStackRef_IsNull(executor)) { + tstate->current_executor = PyStackRef_AsPyObjectBorrow(executor); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(executor); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += 1; + } LLTRACE_RESUME_FRAME(); return result; } @@ -7764,8 +7774,8 @@ _PyFrame_SetStackPointer(frame, stack_pointer); this_instr[1].counter = initial_jump_backoff_counter(); stack_pointer = _PyFrame_GetStackPointer(frame); - assert(tstate->previous_executor == NULL); - tstate->previous_executor = Py_None; + assert(tstate->current_executor == NULL); + tstate->current_executor = (PyObject *)executor; GOTO_TIER_TWO(executor); } } diff --git a/Python/optimizer.c b/Python/optimizer.c index f8d0aa04b9e003..2ca3898bf85d7a 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -204,16 +204,70 @@ get_oparg(PyObject *self, PyObject *Py_UNUSED(ignored)) static int executor_clear(PyObject *executor); static void unlink_executor(_PyExecutorObject *executor); + +static void +free_executor(_PyExecutorObject *self) +{ +#ifdef _Py_JIT + _PyJIT_Free(self); +#endif + PyObject_GC_Del(self); +} + +void +_Py_ClearExecutorDeletionList(PyInterpreterState *interp) +{ + _PyRuntimeState *runtime = &_PyRuntime; + HEAD_LOCK(runtime); + PyThreadState* ts = PyInterpreterState_ThreadHead(interp); + HEAD_UNLOCK(runtime); + while (ts) { + _PyExecutorObject *current = (_PyExecutorObject *)ts->current_executor; + if (current != NULL) { + current->vm_data.linked = 1; + } + HEAD_LOCK(runtime); + ts = PyThreadState_Next(ts); + HEAD_UNLOCK(runtime); + } + _PyExecutorObject **prev_to_next_ptr = &interp->executor_deletion_list_head; + _PyExecutorObject *exec = *prev_to_next_ptr; + while (exec != NULL) { + if (exec->vm_data.linked) { + // This executor is currently executing + exec->vm_data.linked = 0; + prev_to_next_ptr = &exec->vm_data.links.next; + } + else { + *prev_to_next_ptr = exec->vm_data.links.next; + free_executor(exec); + } + exec = *prev_to_next_ptr; + } + interp->executor_deletion_list_remaining_capacity = EXECUTOR_DELETE_LIST_MAX; +} + +static void +add_to_pending_deletion_list(_PyExecutorObject *self) +{ + PyInterpreterState *interp = PyInterpreterState_Get(); + self->vm_data.links.next = interp->executor_deletion_list_head; + interp->executor_deletion_list_head = self; + if (interp->executor_deletion_list_remaining_capacity > 0) { + interp->executor_deletion_list_remaining_capacity--; + } + else { + _Py_ClearExecutorDeletionList(interp); + } +} + static void uop_dealloc(PyObject *op) { _PyExecutorObject *self = _PyExecutorObject_CAST(op); _PyObject_GC_UNTRACK(self); assert(self->vm_data.code == NULL); unlink_executor(self); -#ifdef _Py_JIT - _PyJIT_Free(self); -#endif - PyObject_GC_Del(self); + add_to_pending_deletion_list(self); } const char * diff --git a/Python/pystate.c b/Python/pystate.c index 5685957b160dba..1ac134400856d4 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -678,6 +678,8 @@ init_interpreter(PyInterpreterState *interp, interp->sys_trace_initialized = false; interp->jit = false; interp->executor_list_head = NULL; + interp->executor_deletion_list_head = NULL; + interp->executor_deletion_list_remaining_capacity = 0; interp->trace_run_counter = JIT_CLEANUP_THRESHOLD; if (interp != &runtime->_main_interpreter) { /* Fix the self-referential, statically initialized fields. */ @@ -902,6 +904,10 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) Py_CLEAR(interp->after_forkers_child); #endif + +#ifdef _Py_TIER2 + _Py_ClearExecutorDeletionList(interp); +#endif _PyAST_Fini(interp); _PyWarnings_Fini(interp); _PyAtExit_Fini(interp); @@ -1570,7 +1576,7 @@ init_threadstate(_PyThreadStateImpl *_tstate, tstate->datastack_top = NULL; tstate->datastack_limit = NULL; tstate->what_event = -1; - tstate->previous_executor = NULL; + tstate->current_executor = NULL; tstate->dict_global_version = 0; _tstate->c_stack_soft_limit = UINTPTR_MAX; From 46ef280f8b56011e36debc872c9c0458caa60f77 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 2 May 2025 11:29:18 +0100 Subject: [PATCH 2/6] Handle tail-calling interpreter --- Python/bytecodes.c | 2 ++ Python/ceval.c | 20 ++++++++++++++------ Python/generated_cases.c.h | 2 ++ Python/optimizer.c | 2 ++ 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 2fbf61007cdf6c..a10b5884364ce9 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1169,12 +1169,14 @@ dummy_func( tstate->current_frame = frame->previous; assert(!_PyErr_Occurred(tstate)); PyObject *result = PyStackRef_AsPyObjectSteal(retval); +#if !Py_TAIL_CALL_INTERP && defined(_Py_TIER2) _PyStackRef executor = entry.frame.localsplus[0]; assert(tstate->current_executor == NULL); if (!PyStackRef_IsNull(executor)) { tstate->current_executor = PyStackRef_AsPyObjectBorrow(executor); PyStackRef_CLOSE(executor); } +#endif LLTRACE_RESUME_FRAME(); return result; } diff --git a/Python/ceval.c b/Python/ceval.c index c4f91d5654b9b7..94f6be9cfea5d7 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1050,14 +1050,13 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int entry.frame.previous = tstate->current_frame; frame->previous = &entry.frame; tstate->current_frame = frame; - + entry.frame.localsplus[0] = PyStackRef_NULL; +#ifdef _Py_TIER2 if (tstate->current_executor != NULL) { entry.frame.localsplus[0] = PyStackRef_FromPyObjectNew(tstate->current_executor); tstate->current_executor = NULL; } - else { - entry.frame.localsplus[0] = PyStackRef_NULL; - } +#endif /* support for generator.throw() */ if (throwflag) { @@ -1084,10 +1083,19 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int stack_pointer = _PyFrame_GetStackPointer(frame); #if Py_TAIL_CALL_INTERP # if Py_STATS - return _TAIL_CALL_error(frame, stack_pointer, tstate, next_instr, 0, lastopcode); + PyObject *res = _TAIL_CALL_error(frame, stack_pointer, tstate, next_instr, 0, lastopcode); # else - return _TAIL_CALL_error(frame, stack_pointer, tstate, next_instr, 0); + PyObject *res = _TAIL_CALL_error(frame, stack_pointer, tstate, next_instr, 0); +# endif +# ifdef _Py_TIER2 + _PyStackRef executor = entry.frame.localsplus[0]; + assert(tstate->current_executor == NULL); + if (!PyStackRef_IsNull(executor)) { + tstate->current_executor = PyStackRef_AsPyObjectBorrow(executor); + PyStackRef_CLOSE(executor); + } # endif + return res; #else goto error; #endif diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 1a217d44f3e56f..536202578e9b33 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -7629,6 +7629,7 @@ tstate->current_frame = frame->previous; assert(!_PyErr_Occurred(tstate)); PyObject *result = PyStackRef_AsPyObjectSteal(retval); + #if !Py_TAIL_CALL_INTERP && defined(_Py_TIER2) _PyStackRef executor = entry.frame.localsplus[0]; assert(tstate->current_executor == NULL); if (!PyStackRef_IsNull(executor)) { @@ -7640,6 +7641,7 @@ stack_pointer = _PyFrame_GetStackPointer(frame); stack_pointer += 1; } + #endif LLTRACE_RESUME_FRAME(); return result; } diff --git a/Python/optimizer.c b/Python/optimizer.c index 2ca3898bf85d7a..b18856889f06ef 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -267,6 +267,8 @@ uop_dealloc(PyObject *op) { _PyObject_GC_UNTRACK(self); assert(self->vm_data.code == NULL); unlink_executor(self); + // Once unlinked it becomes impossible to invalidate an executor, so do it here. + self->vm_data.valid = 0; add_to_pending_deletion_list(self); } From 2f42cb081821e992112da96fbacddf04fe63567a Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sat, 3 May 2025 09:34:18 +0100 Subject: [PATCH 3/6] Don't use entry.frame in tailcalling interpreter --- Python/bytecodes.c | 18 ++++++++++++++++-- Python/ceval.c | 13 ++----------- Python/generated_cases.c.h | 18 ++++++++++++++++-- 3 files changed, 34 insertions(+), 15 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 9f2020ab15e83e..279f1cfcdc66fd 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1169,8 +1169,11 @@ dummy_func( tstate->current_frame = frame->previous; assert(!_PyErr_Occurred(tstate)); PyObject *result = PyStackRef_AsPyObjectSteal(retval); -#if !Py_TAIL_CALL_INTERP && defined(_Py_TIER2) - _PyStackRef executor = entry.frame.localsplus[0]; +#if !Py_TAIL_CALL_INTERP + assert(frame == &entry.frame); +#endif +#ifdef _Py_TIER2 + _PyStackRef executor = frame->localsplus[0]; assert(tstate->current_executor == NULL); if (!PyStackRef_IsNull(executor)) { tstate->current_executor = PyStackRef_AsPyObjectBorrow(executor); @@ -5469,6 +5472,17 @@ dummy_func( if (frame->owner == FRAME_OWNED_BY_INTERPRETER) { /* Restore previous frame and exit */ tstate->current_frame = frame->previous; +#if !Py_TAIL_CALL_INTERP + assert(frame == &entry.frame); +#endif +#ifdef _Py_TIER2 + _PyStackRef executor = frame->localsplus[0]; + assert(tstate->current_executor == NULL); + if (!PyStackRef_IsNull(executor)) { + tstate->current_executor = PyStackRef_AsPyObjectBorrow(executor); + PyStackRef_CLOSE(executor); + } +#endif return NULL; } next_instr = frame->instr_ptr; diff --git a/Python/ceval.c b/Python/ceval.c index 94f6be9cfea5d7..490b653f132a6a 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1083,19 +1083,10 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int stack_pointer = _PyFrame_GetStackPointer(frame); #if Py_TAIL_CALL_INTERP # if Py_STATS - PyObject *res = _TAIL_CALL_error(frame, stack_pointer, tstate, next_instr, 0, lastopcode); + return _TAIL_CALL_error(frame, stack_pointer, tstate, next_instr, 0, lastopcode); # else - PyObject *res = _TAIL_CALL_error(frame, stack_pointer, tstate, next_instr, 0); + return _TAIL_CALL_error(frame, stack_pointer, tstate, next_instr, 0); # endif -# ifdef _Py_TIER2 - _PyStackRef executor = entry.frame.localsplus[0]; - assert(tstate->current_executor == NULL); - if (!PyStackRef_IsNull(executor)) { - tstate->current_executor = PyStackRef_AsPyObjectBorrow(executor); - PyStackRef_CLOSE(executor); - } -# endif - return res; #else goto error; #endif diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index c3a4a0ad33d841..f9aa22f1182f74 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -7651,8 +7651,11 @@ tstate->current_frame = frame->previous; assert(!_PyErr_Occurred(tstate)); PyObject *result = PyStackRef_AsPyObjectSteal(retval); - #if !Py_TAIL_CALL_INTERP && defined(_Py_TIER2) - _PyStackRef executor = entry.frame.localsplus[0]; + #if !Py_TAIL_CALL_INTERP + assert(frame == &entry.frame); + #endif + #ifdef _Py_TIER2 + _PyStackRef executor = frame->localsplus[0]; assert(tstate->current_executor == NULL); if (!PyStackRef_IsNull(executor)) { tstate->current_executor = PyStackRef_AsPyObjectBorrow(executor); @@ -12436,6 +12439,17 @@ JUMP_TO_LABEL(error); frame->return_offset = 0; if (frame->owner == FRAME_OWNED_BY_INTERPRETER) { tstate->current_frame = frame->previous; + #if !Py_TAIL_CALL_INTERP + assert(frame == &entry.frame); + #endif + #ifdef _Py_TIER2 + _PyStackRef executor = frame->localsplus[0]; + assert(tstate->current_executor == NULL); + if (!PyStackRef_IsNull(executor)) { + tstate->current_executor = PyStackRef_AsPyObjectBorrow(executor); + PyStackRef_CLOSE(executor); + } + #endif return NULL; } next_instr = frame->instr_ptr; From f938d68f6d525e7239ea7270488cdd3937af58b8 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sat, 3 May 2025 09:57:45 +0100 Subject: [PATCH 4/6] Address review comments --- Python/bytecodes.c | 6 ------ Python/ceval_macros.h | 7 +++++-- Python/executor_cases.c.h | 4 ---- Python/generated_cases.c.h | 2 -- Python/optimizer.c | 2 ++ Tools/jit/template.c | 5 ++++- 6 files changed, 11 insertions(+), 15 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 279f1cfcdc66fd..5a52efaaec8042 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2924,7 +2924,6 @@ dummy_func( else { this_instr[1].counter = initial_jump_backoff_counter(); assert(tstate->current_executor == NULL); - tstate->current_executor = (PyObject *)executor; GOTO_TIER_TWO(executor); } } @@ -2989,7 +2988,6 @@ dummy_func( } DISPATCH_GOTO(); } - tstate->current_executor = (PyObject *)executor; GOTO_TIER_TWO(executor); #else Py_FatalError("ENTER_EXECUTOR is not supported in this build"); @@ -5264,7 +5262,6 @@ dummy_func( exit->temperature = initial_temperature_backoff_counter(); Py_CLEAR(exit->executor); } - tstate->current_executor = NULL; if (exit->executor == NULL) { _Py_BackoffCounter temperature = exit->temperature; if (!backoff_counter_triggers(temperature)) { @@ -5287,7 +5284,6 @@ dummy_func( } exit->executor = executor; } - tstate->current_executor = (PyObject *)exit->executor; GOTO_TIER_TWO(exit->executor); } @@ -5346,12 +5342,10 @@ dummy_func( } tier2 op(_DEOPT, (--)) { - tstate->current_executor = NULL; GOTO_TIER_ONE(_PyFrame_GetBytecode(frame) + CURRENT_TARGET()); } tier2 op(_ERROR_POP_N, (target/2 --)) { - tstate->current_executor = NULL; assert(oparg == 0); frame->instr_ptr = _PyFrame_GetBytecode(frame) + target; SYNC_SP(); diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index e3852b7b55850e..eca574b5455dbc 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -359,7 +359,7 @@ _PyFrame_SetStackPointer(frame, stack_pointer) do { \ OPT_STAT_INC(traces_executed); \ _PyExecutorObject *_executor = (EXECUTOR); \ - assert(tstate->current_executor == _executor); \ + tstate->current_executor = (PyObject *)_executor; \ jit_func jitted = _executor->jit_code; \ /* Keep the shim frame alive via the executor: */ \ Py_INCREF(_executor); \ @@ -378,7 +378,9 @@ do { \ #define GOTO_TIER_TWO(EXECUTOR) \ do { \ OPT_STAT_INC(traces_executed); \ - next_uop = (EXECUTOR)->trace; \ + _PyExecutorObject *_executor = (EXECUTOR); \ + tstate->current_executor = (PyObject *)_executor; \ + next_uop = _executor->trace; \ assert(next_uop->opcode == _START_EXECUTOR); \ goto enter_tier_two; \ } while (0) @@ -387,6 +389,7 @@ do { \ #define GOTO_TIER_ONE(TARGET) \ do \ { \ + tstate->current_executor = NULL; \ next_instr = (TARGET); \ assert(tstate->current_executor == NULL); \ OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); \ diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index c67e2c1f40855c..7f3c3141ad00b9 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -6969,7 +6969,6 @@ Py_CLEAR(exit->executor); stack_pointer = _PyFrame_GetStackPointer(frame); } - tstate->current_executor = NULL; if (exit->executor == NULL) { _Py_BackoffCounter temperature = exit->temperature; if (!backoff_counter_triggers(temperature)) { @@ -6994,7 +6993,6 @@ } exit->executor = executor; } - tstate->current_executor = (PyObject *)exit->executor; GOTO_TIER_TWO(exit->executor); break; } @@ -7120,7 +7118,6 @@ } case _DEOPT: { - tstate->current_executor = NULL; GOTO_TIER_ONE(_PyFrame_GetBytecode(frame) + CURRENT_TARGET()); break; } @@ -7128,7 +7125,6 @@ case _ERROR_POP_N: { oparg = CURRENT_OPARG(); uint32_t target = (uint32_t)CURRENT_OPERAND0(); - tstate->current_executor = NULL; assert(oparg == 0); frame->instr_ptr = _PyFrame_GetBytecode(frame) + target; GOTO_TIER_ONE(NULL); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index f9aa22f1182f74..ee54b385b7064e 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -5567,7 +5567,6 @@ } DISPATCH_GOTO(); } - tstate->current_executor = (PyObject *)executor; GOTO_TIER_TWO(executor); #else Py_FatalError("ENTER_EXECUTOR is not supported in this build"); @@ -7802,7 +7801,6 @@ this_instr[1].counter = initial_jump_backoff_counter(); stack_pointer = _PyFrame_GetStackPointer(frame); assert(tstate->current_executor == NULL); - tstate->current_executor = (PyObject *)executor; GOTO_TIER_TWO(executor); } } diff --git a/Python/optimizer.c b/Python/optimizer.c index b18856889f06ef..dde3dd8ebe745a 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -224,6 +224,8 @@ _Py_ClearExecutorDeletionList(PyInterpreterState *interp) while (ts) { _PyExecutorObject *current = (_PyExecutorObject *)ts->current_executor; if (current != NULL) { + /* Anything in this list will be unlinked, so we can reuse the + * linked field as a reachability marker. */ current->vm_data.linked = 1; } HEAD_LOCK(runtime); diff --git a/Tools/jit/template.c b/Tools/jit/template.c index 68cf75942d85c9..5ee26f93f1e266 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -50,13 +50,16 @@ #define GOTO_TIER_TWO(EXECUTOR) \ do { \ OPT_STAT_INC(traces_executed); \ - jit_func_preserve_none jitted = (EXECUTOR)->jit_side_entry; \ + _PyExecutorObject *_executor = (EXECUTOR); \ + tstate->current_executor = (PyObject *)_executor; \ + jit_func_preserve_none jitted = _executor->jit_side_entry; \ __attribute__((musttail)) return jitted(frame, stack_pointer, tstate); \ } while (0) #undef GOTO_TIER_ONE #define GOTO_TIER_ONE(TARGET) \ do { \ + tstate->current_executor = NULL; \ _PyFrame_SetStackPointer(frame, stack_pointer); \ return TARGET; \ } while (0) From a30e444e1f580dcb8c14478e84004df78123cacc Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sat, 3 May 2025 10:06:38 +0100 Subject: [PATCH 5/6] Fix compiler warning --- Include/internal/pycore_optimizer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index e756b2ffeb97ad..d3674726997f6a 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -69,7 +69,7 @@ typedef struct { typedef struct { uint32_t target; _Py_BackoffCounter temperature; - const struct _PyExecutorObject *executor; + struct _PyExecutorObject *executor; } _PyExitData; typedef struct _PyExecutorObject { From 0b9dcb7df0e7e6a7ad38fd05d8bfec5c1532cdd7 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sun, 4 May 2025 08:39:39 +0100 Subject: [PATCH 6/6] Remove redundant code --- Python/ceval_macros.h | 1 - 1 file changed, 1 deletion(-) diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index eca574b5455dbc..187ec8fdd26584 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -365,7 +365,6 @@ do { \ Py_INCREF(_executor); \ next_instr = jitted(frame, stack_pointer, tstate); \ Py_DECREF(_executor); \ - tstate->current_executor = NULL; \ frame = tstate->current_frame; \ stack_pointer = _PyFrame_GetStackPointer(frame); \ if (next_instr == NULL) { \ pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy