diff --git a/Include/internal/pycore_call.h b/Include/internal/pycore_call.h index 55378e3dfebf24..5d9342b562b002 100644 --- a/Include/internal/pycore_call.h +++ b/Include/internal/pycore_call.h @@ -116,6 +116,16 @@ _PyObject_FastCallTstate(PyThreadState *tstate, PyObject *func, PyObject *const return _PyObject_VectorcallTstate(tstate, func, args, (size_t)nargs, NULL); } +PyObject *const * +_PyStack_UnpackDict(PyThreadState *tstate, + PyObject *const *args, Py_ssize_t nargs, + PyObject *kwargs, PyObject **p_kwnames); + +void +_PyStack_UnpackDict_Free(PyObject *const *stack, Py_ssize_t nargs, + PyObject *kwnames); + +void _PyStack_UnpackDict_FreeNoDecRef(PyObject *const *stack, PyObject *kwnames); #ifdef __cplusplus } diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-10-06-23-32-11.gh-issue-98003.xWE0Yu.rst b/Misc/NEWS.d/next/Core and Builtins/2022-10-06-23-32-11.gh-issue-98003.xWE0Yu.rst new file mode 100644 index 00000000000000..f9e71bc1344bb3 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-10-06-23-32-11.gh-issue-98003.xWE0Yu.rst @@ -0,0 +1,3 @@ +Complex function calls are now faster and consume no C stack +space. + diff --git a/Objects/call.c b/Objects/call.c index bd027e41f8a9a5..cf6e357a990441 100644 --- a/Objects/call.c +++ b/Objects/call.c @@ -8,16 +8,6 @@ #include "pycore_tuple.h" // _PyTuple_ITEMS() -static PyObject *const * -_PyStack_UnpackDict(PyThreadState *tstate, - PyObject *const *args, Py_ssize_t nargs, - PyObject *kwargs, PyObject **p_kwnames); - -static void -_PyStack_UnpackDict_Free(PyObject *const *stack, Py_ssize_t nargs, - PyObject *kwnames); - - static PyObject * null_error(PyThreadState *tstate) { @@ -965,7 +955,7 @@ _PyStack_AsDict(PyObject *const *values, PyObject *kwnames) The newly allocated argument vector supports PY_VECTORCALL_ARGUMENTS_OFFSET. When done, you must call _PyStack_UnpackDict_Free(stack, nargs, kwnames) */ -static PyObject *const * +PyObject *const * _PyStack_UnpackDict(PyThreadState *tstate, PyObject *const *args, Py_ssize_t nargs, PyObject *kwargs, PyObject **p_kwnames) @@ -1034,7 +1024,7 @@ _PyStack_UnpackDict(PyThreadState *tstate, return stack; } -static void +void _PyStack_UnpackDict_Free(PyObject *const *stack, Py_ssize_t nargs, PyObject *kwnames) { @@ -1042,6 +1032,12 @@ _PyStack_UnpackDict_Free(PyObject *const *stack, Py_ssize_t nargs, for (Py_ssize_t i = 0; i < n; i++) { Py_DECREF(stack[i]); } + _PyStack_UnpackDict_FreeNoDecRef(stack, kwnames); +} + +void +_PyStack_UnpackDict_FreeNoDecRef(PyObject *const *stack, PyObject *kwnames) +{ PyMem_Free((PyObject **)stack - 1); Py_DECREF(kwnames); } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 9de0d92e382d3d..e83894e8902872 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3103,6 +3103,25 @@ dummy_func( } } else { + if (Py_TYPE(func) == &PyFunction_Type && + tstate->interp->eval_frame == NULL && + ((PyFunctionObject *)func)->vectorcall == _PyFunction_Vectorcall) { + assert(PyTuple_CheckExact(callargs)); + Py_ssize_t nargs = PyTuple_GET_SIZE(callargs); + int code_flags = ((PyCodeObject *)PyFunction_GET_CODE(func))->co_flags; + PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(func)); + + _PyInterpreterFrame *new_frame = _PyEvalFramePushAndInit_Ex(tstate, + (PyFunctionObject *)func, locals, + nargs, callargs, kwargs); + // Need to manually shrink the stack since we exit with DISPATCH_INLINED. + STACK_SHRINK(oparg + 3); + if (new_frame == NULL) { + goto error; + } + frame->return_offset = 0; + DISPATCH_INLINED(new_frame); + } result = PyObject_Call(func, callargs, kwargs); } DECREF_INPUTS(); diff --git a/Python/ceval.c b/Python/ceval.c index 5d5221b2e40990..958689debc87f8 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -212,6 +212,9 @@ static _PyInterpreterFrame * _PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func, PyObject *locals, PyObject* const* args, size_t argcount, PyObject *kwnames); +static _PyInterpreterFrame * +_PyEvalFramePushAndInit_Ex(PyThreadState *tstate, PyFunctionObject *func, + PyObject *locals, Py_ssize_t nargs, PyObject *callargs, PyObject *kwargs); static void _PyEvalFrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame); @@ -1501,6 +1504,49 @@ _PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func, return NULL; } +/* Same as _PyEvalFramePushAndInit but takes an args tuple and kwargs dict. + Steals references to func, callargs and kwargs. +*/ +static _PyInterpreterFrame * +_PyEvalFramePushAndInit_Ex(PyThreadState *tstate, PyFunctionObject *func, + PyObject *locals, Py_ssize_t nargs, PyObject *callargs, PyObject *kwargs) +{ + bool has_dict = (kwargs != NULL && PyDict_GET_SIZE(kwargs) > 0); + PyObject *kwnames = NULL; + PyObject *const *newargs; + if (has_dict) { + newargs = _PyStack_UnpackDict(tstate, _PyTuple_ITEMS(callargs), nargs, kwargs, &kwnames); + if (newargs == NULL) { + Py_DECREF(func); + goto error; + } + } + else { + newargs = &PyTuple_GET_ITEM(callargs, 0); + /* We need to incref all our args since the new frame steals the references. */ + for (Py_ssize_t i = 0; i < nargs; ++i) { + Py_INCREF(PyTuple_GET_ITEM(callargs, i)); + } + } + _PyInterpreterFrame *new_frame = _PyEvalFramePushAndInit( + tstate, (PyFunctionObject *)func, locals, + newargs, nargs, kwnames + ); + if (has_dict) { + _PyStack_UnpackDict_FreeNoDecRef(newargs, kwnames); + } + /* No need to decref func here because the reference has been stolen by + _PyEvalFramePushAndInit. + */ + Py_DECREF(callargs); + Py_XDECREF(kwargs); + return new_frame; +error: + Py_DECREF(callargs); + Py_XDECREF(kwargs); + return NULL; +} + PyObject * _PyEval_Vector(PyThreadState *tstate, PyFunctionObject *func, PyObject *locals, diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 864a4f7bcaff0f..069a7ced0a4c25 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4296,16 +4296,35 @@ } } else { + if (Py_TYPE(func) == &PyFunction_Type && + tstate->interp->eval_frame == NULL && + ((PyFunctionObject *)func)->vectorcall == _PyFunction_Vectorcall) { + assert(PyTuple_CheckExact(callargs)); + Py_ssize_t nargs = PyTuple_GET_SIZE(callargs); + int code_flags = ((PyCodeObject *)PyFunction_GET_CODE(func))->co_flags; + PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(func)); + + _PyInterpreterFrame *new_frame = _PyEvalFramePushAndInit_Ex(tstate, + (PyFunctionObject *)func, locals, + nargs, callargs, kwargs); + // Need to manually shrink the stack since we exit with DISPATCH_INLINED. + STACK_SHRINK(oparg + 3); + if (new_frame == NULL) { + goto error; + } + frame->return_offset = 0; + DISPATCH_INLINED(new_frame); + } result = PyObject_Call(func, callargs, kwargs); } - #line 4302 "Python/generated_cases.c.h" + #line 4321 "Python/generated_cases.c.h" Py_DECREF(func); Py_DECREF(callargs); Py_XDECREF(kwargs); - #line 3109 "Python/bytecodes.c" + #line 3128 "Python/bytecodes.c" assert(PEEK(3 + (oparg & 1)) == NULL); if (result == NULL) { STACK_SHRINK(((oparg & 1) ? 1 : 0)); goto pop_3_error; } - #line 4309 "Python/generated_cases.c.h" + #line 4328 "Python/generated_cases.c.h" STACK_SHRINK(((oparg & 1) ? 1 : 0)); STACK_SHRINK(2); stack_pointer[-1] = result; @@ -4320,7 +4339,7 @@ PyObject *kwdefaults = (oparg & 0x02) ? stack_pointer[-(1 + ((oparg & 0x08) ? 1 : 0) + ((oparg & 0x04) ? 1 : 0) + ((oparg & 0x02) ? 1 : 0))] : NULL; PyObject *defaults = (oparg & 0x01) ? stack_pointer[-(1 + ((oparg & 0x08) ? 1 : 0) + ((oparg & 0x04) ? 1 : 0) + ((oparg & 0x02) ? 1 : 0) + ((oparg & 0x01) ? 1 : 0))] : NULL; PyObject *func; - #line 3119 "Python/bytecodes.c" + #line 3138 "Python/bytecodes.c" PyFunctionObject *func_obj = (PyFunctionObject *) PyFunction_New(codeobj, GLOBALS()); @@ -4349,14 +4368,14 @@ func_obj->func_version = ((PyCodeObject *)codeobj)->co_version; func = (PyObject *)func_obj; - #line 4353 "Python/generated_cases.c.h" + #line 4372 "Python/generated_cases.c.h" STACK_SHRINK(((oparg & 0x01) ? 1 : 0) + ((oparg & 0x02) ? 1 : 0) + ((oparg & 0x04) ? 1 : 0) + ((oparg & 0x08) ? 1 : 0)); stack_pointer[-1] = func; DISPATCH(); } TARGET(RETURN_GENERATOR) { - #line 3150 "Python/bytecodes.c" + #line 3169 "Python/bytecodes.c" assert(PyFunction_Check(frame->f_funcobj)); PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj; PyGenObject *gen = (PyGenObject *)_Py_MakeCoro(func); @@ -4377,7 +4396,7 @@ frame = cframe.current_frame = prev; _PyFrame_StackPush(frame, (PyObject *)gen); goto resume_frame; - #line 4381 "Python/generated_cases.c.h" + #line 4400 "Python/generated_cases.c.h" } TARGET(BUILD_SLICE) { @@ -4385,15 +4404,15 @@ PyObject *stop = stack_pointer[-(1 + ((oparg == 3) ? 1 : 0))]; PyObject *start = stack_pointer[-(2 + ((oparg == 3) ? 1 : 0))]; PyObject *slice; - #line 3173 "Python/bytecodes.c" + #line 3192 "Python/bytecodes.c" slice = PySlice_New(start, stop, step); - #line 4391 "Python/generated_cases.c.h" + #line 4410 "Python/generated_cases.c.h" Py_DECREF(start); Py_DECREF(stop); Py_XDECREF(step); - #line 3175 "Python/bytecodes.c" + #line 3194 "Python/bytecodes.c" if (slice == NULL) { STACK_SHRINK(((oparg == 3) ? 1 : 0)); goto pop_2_error; } - #line 4397 "Python/generated_cases.c.h" + #line 4416 "Python/generated_cases.c.h" STACK_SHRINK(((oparg == 3) ? 1 : 0)); STACK_SHRINK(1); stack_pointer[-1] = slice; @@ -4404,7 +4423,7 @@ PyObject *fmt_spec = ((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? stack_pointer[-((((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0))] : NULL; PyObject *value = stack_pointer[-(1 + (((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0))]; PyObject *result; - #line 3179 "Python/bytecodes.c" + #line 3198 "Python/bytecodes.c" /* Handles f-string value formatting. */ PyObject *(*conv_fn)(PyObject *); int which_conversion = oparg & FVC_MASK; @@ -4439,7 +4458,7 @@ Py_DECREF(value); Py_XDECREF(fmt_spec); if (result == NULL) { STACK_SHRINK((((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0)); goto pop_1_error; } - #line 4443 "Python/generated_cases.c.h" + #line 4462 "Python/generated_cases.c.h" STACK_SHRINK((((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0)); stack_pointer[-1] = result; DISPATCH(); @@ -4448,10 +4467,10 @@ TARGET(COPY) { PyObject *bottom = stack_pointer[-(1 + (oparg-1))]; PyObject *top; - #line 3216 "Python/bytecodes.c" + #line 3235 "Python/bytecodes.c" assert(oparg > 0); top = Py_NewRef(bottom); - #line 4455 "Python/generated_cases.c.h" + #line 4474 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = top; DISPATCH(); @@ -4463,7 +4482,7 @@ PyObject *rhs = stack_pointer[-1]; PyObject *lhs = stack_pointer[-2]; PyObject *res; - #line 3221 "Python/bytecodes.c" + #line 3240 "Python/bytecodes.c" #if ENABLE_SPECIALIZATION _PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { @@ -4478,12 +4497,12 @@ assert((unsigned)oparg < Py_ARRAY_LENGTH(binary_ops)); assert(binary_ops[oparg]); res = binary_ops[oparg](lhs, rhs); - #line 4482 "Python/generated_cases.c.h" + #line 4501 "Python/generated_cases.c.h" Py_DECREF(lhs); Py_DECREF(rhs); - #line 3236 "Python/bytecodes.c" + #line 3255 "Python/bytecodes.c" if (res == NULL) goto pop_2_error; - #line 4487 "Python/generated_cases.c.h" + #line 4506 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = res; next_instr += 1; @@ -4493,16 +4512,16 @@ TARGET(SWAP) { PyObject *top = stack_pointer[-1]; PyObject *bottom = stack_pointer[-(2 + (oparg-2))]; - #line 3241 "Python/bytecodes.c" + #line 3260 "Python/bytecodes.c" assert(oparg >= 2); - #line 4499 "Python/generated_cases.c.h" + #line 4518 "Python/generated_cases.c.h" stack_pointer[-1] = bottom; stack_pointer[-(2 + (oparg-2))] = top; DISPATCH(); } TARGET(INSTRUMENTED_LINE) { - #line 3245 "Python/bytecodes.c" + #line 3264 "Python/bytecodes.c" _Py_CODEUNIT *here = next_instr-1; _PyFrame_SetStackPointer(frame, stack_pointer); int original_opcode = _Py_call_instrumentation_line( @@ -4522,11 +4541,11 @@ } opcode = original_opcode; DISPATCH_GOTO(); - #line 4526 "Python/generated_cases.c.h" + #line 4545 "Python/generated_cases.c.h" } TARGET(INSTRUMENTED_INSTRUCTION) { - #line 3267 "Python/bytecodes.c" + #line 3286 "Python/bytecodes.c" int next_opcode = _Py_call_instrumentation_instruction( tstate, frame, next_instr-1); if (next_opcode < 0) goto error; @@ -4538,26 +4557,26 @@ assert(next_opcode > 0 && next_opcode < 256); opcode = next_opcode; DISPATCH_GOTO(); - #line 4542 "Python/generated_cases.c.h" + #line 4561 "Python/generated_cases.c.h" } TARGET(INSTRUMENTED_JUMP_FORWARD) { - #line 3281 "Python/bytecodes.c" + #line 3300 "Python/bytecodes.c" INSTRUMENTED_JUMP(next_instr-1, next_instr+oparg, PY_MONITORING_EVENT_JUMP); - #line 4548 "Python/generated_cases.c.h" + #line 4567 "Python/generated_cases.c.h" DISPATCH(); } TARGET(INSTRUMENTED_JUMP_BACKWARD) { - #line 3285 "Python/bytecodes.c" + #line 3304 "Python/bytecodes.c" INSTRUMENTED_JUMP(next_instr-1, next_instr-oparg, PY_MONITORING_EVENT_JUMP); - #line 4555 "Python/generated_cases.c.h" + #line 4574 "Python/generated_cases.c.h" CHECK_EVAL_BREAKER(); DISPATCH(); } TARGET(INSTRUMENTED_POP_JUMP_IF_TRUE) { - #line 3290 "Python/bytecodes.c" + #line 3309 "Python/bytecodes.c" PyObject *cond = POP(); int err = PyObject_IsTrue(cond); Py_DECREF(cond); @@ -4566,12 +4585,12 @@ assert(err == 0 || err == 1); int offset = err*oparg; INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH); - #line 4570 "Python/generated_cases.c.h" + #line 4589 "Python/generated_cases.c.h" DISPATCH(); } TARGET(INSTRUMENTED_POP_JUMP_IF_FALSE) { - #line 3301 "Python/bytecodes.c" + #line 3320 "Python/bytecodes.c" PyObject *cond = POP(); int err = PyObject_IsTrue(cond); Py_DECREF(cond); @@ -4580,12 +4599,12 @@ assert(err == 0 || err == 1); int offset = (1-err)*oparg; INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH); - #line 4584 "Python/generated_cases.c.h" + #line 4603 "Python/generated_cases.c.h" DISPATCH(); } TARGET(INSTRUMENTED_POP_JUMP_IF_NONE) { - #line 3312 "Python/bytecodes.c" + #line 3331 "Python/bytecodes.c" PyObject *value = POP(); _Py_CODEUNIT *here = next_instr-1; int offset; @@ -4598,12 +4617,12 @@ offset = 0; } INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH); - #line 4602 "Python/generated_cases.c.h" + #line 4621 "Python/generated_cases.c.h" DISPATCH(); } TARGET(INSTRUMENTED_POP_JUMP_IF_NOT_NONE) { - #line 3327 "Python/bytecodes.c" + #line 3346 "Python/bytecodes.c" PyObject *value = POP(); _Py_CODEUNIT *here = next_instr-1; int offset; @@ -4616,30 +4635,30 @@ offset = oparg; } INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH); - #line 4620 "Python/generated_cases.c.h" + #line 4639 "Python/generated_cases.c.h" DISPATCH(); } TARGET(EXTENDED_ARG) { - #line 3342 "Python/bytecodes.c" + #line 3361 "Python/bytecodes.c" assert(oparg); opcode = next_instr->op.code; oparg = oparg << 8 | next_instr->op.arg; PRE_DISPATCH_GOTO(); DISPATCH_GOTO(); - #line 4631 "Python/generated_cases.c.h" + #line 4650 "Python/generated_cases.c.h" } TARGET(CACHE) { - #line 3350 "Python/bytecodes.c" + #line 3369 "Python/bytecodes.c" assert(0 && "Executing a cache."); Py_UNREACHABLE(); - #line 4638 "Python/generated_cases.c.h" + #line 4657 "Python/generated_cases.c.h" } TARGET(RESERVED) { - #line 3355 "Python/bytecodes.c" + #line 3374 "Python/bytecodes.c" assert(0 && "Executing RESERVED instruction."); Py_UNREACHABLE(); - #line 4645 "Python/generated_cases.c.h" + #line 4664 "Python/generated_cases.c.h" }
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: