diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index c88640777e3fb0..e94732b64384b5 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -39,12 +39,18 @@ #define _GUARD_TYPE_VERSION 317 #define _CHECK_MANAGED_OBJECT_HAS_VALUES 318 #define IS_NONE 319 -#define _ITER_CHECK_RANGE 320 -#define _ITER_EXHAUSTED_RANGE 321 -#define _ITER_NEXT_RANGE 322 -#define _POP_JUMP_IF_FALSE 323 -#define _POP_JUMP_IF_TRUE 324 -#define JUMP_TO_TOP 325 +#define _ITER_CHECK_LIST 320 +#define _IS_ITER_EXHAUSTED_LIST 321 +#define _ITER_NEXT_LIST 322 +#define _ITER_CHECK_TUPLE 323 +#define _IS_ITER_EXHAUSTED_TUPLE 324 +#define _ITER_NEXT_TUPLE 325 +#define _ITER_CHECK_RANGE 326 +#define _IS_ITER_EXHAUSTED_RANGE 327 +#define _ITER_NEXT_RANGE 328 +#define _POP_JUMP_IF_FALSE 329 +#define _POP_JUMP_IF_TRUE 330 +#define JUMP_TO_TOP 331 #ifndef NEED_OPCODE_METADATA extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump); @@ -1323,8 +1329,14 @@ const char * const _PyOpcode_uop_name[512] = { [_GUARD_TYPE_VERSION] = "_GUARD_TYPE_VERSION", [_CHECK_MANAGED_OBJECT_HAS_VALUES] = "_CHECK_MANAGED_OBJECT_HAS_VALUES", [IS_NONE] = "IS_NONE", + [_ITER_CHECK_LIST] = "_ITER_CHECK_LIST", + [_IS_ITER_EXHAUSTED_LIST] = "_IS_ITER_EXHAUSTED_LIST", + [_ITER_NEXT_LIST] = "_ITER_NEXT_LIST", + [_ITER_CHECK_TUPLE] = "_ITER_CHECK_TUPLE", + [_IS_ITER_EXHAUSTED_TUPLE] = "_IS_ITER_EXHAUSTED_TUPLE", + [_ITER_NEXT_TUPLE] = "_ITER_NEXT_TUPLE", [_ITER_CHECK_RANGE] = "_ITER_CHECK_RANGE", - [_ITER_EXHAUSTED_RANGE] = "_ITER_EXHAUSTED_RANGE", + [_IS_ITER_EXHAUSTED_RANGE] = "_IS_ITER_EXHAUSTED_RANGE", [_ITER_NEXT_RANGE] = "_ITER_NEXT_RANGE", [_POP_JUMP_IF_FALSE] = "_POP_JUMP_IF_FALSE", [_POP_JUMP_IF_TRUE] = "_POP_JUMP_IF_TRUE", diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index abdf7ed8976350..43c04463236a2a 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2590,7 +2590,6 @@ def testfunc(n): for i in range(n): total += i return total - # import dis; dis.dis(testfunc) opt = _testinternalcapi.get_uop_optimizer() with temporary_optimizer(opt): @@ -2602,7 +2601,51 @@ def testfunc(n): # for i, (opname, oparg) in enumerate(ex): # print(f"{i:4d}: {opname:<20s} {oparg:3d}") uops = {opname for opname, _ in ex} - self.assertIn("_ITER_EXHAUSTED_RANGE", uops) + self.assertIn("_IS_ITER_EXHAUSTED_RANGE", uops) + # Verification that the jump goes past END_FOR + # is done by manual inspection of the output + + def test_for_iter_list(self): + def testfunc(a): + total = 0 + for i in a: + total += i + return total + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + a = list(range(10)) + total = testfunc(a) + self.assertEqual(total, 45) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + # for i, (opname, oparg) in enumerate(ex): + # print(f"{i:4d}: {opname:<20s} {oparg:3d}") + uops = {opname for opname, _ in ex} + self.assertIn("_IS_ITER_EXHAUSTED_LIST", uops) + # Verification that the jump goes past END_FOR + # is done by manual inspection of the output + + def test_for_iter_tuple(self): + def testfunc(a): + total = 0 + for i in a: + total += i + return total + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + a = tuple(range(10)) + total = testfunc(a) + self.assertEqual(total, 45) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + # for i, (opname, oparg) in enumerate(ex): + # print(f"{i:4d}: {opname:<20s} {oparg:3d}") + uops = {opname for opname, _ in ex} + self.assertIn("_IS_ITER_EXHAUSTED_TUPLE", uops) # Verification that the jump goes past END_FOR # is done by manual inspection of the output diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 1fe9970e53cdfe..15b48ae9d82672 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -17,6 +17,7 @@ #include "pycore_object.h" // _PyObject_GC_TRACK() #include "pycore_moduleobject.h" // PyModuleObject #include "pycore_opcode.h" // EXTRA_CASES +#include "pycore_opcode_metadata.h" // uop names #include "pycore_opcode_utils.h" // MAKE_FUNCTION_* #include "pycore_pyerrors.h" // _PyErr_GetRaisedException() #include "pycore_pystate.h" // _PyInterpreterState_GET() @@ -55,13 +56,14 @@ static PyObject *value, *value1, *value2, *left, *right, *res, *sum, *prod, *sub; static PyObject *container, *start, *stop, *v, *lhs, *rhs, *res2; static PyObject *list, *tuple, *dict, *owner, *set, *str, *tup, *map, *keys; -static PyObject *exit_func, *lasti, *val, *retval, *obj, *iter; +static PyObject *exit_func, *lasti, *val, *retval, *obj, *iter, *exhausted; static PyObject *aiter, *awaitable, *iterable, *w, *exc_value, *bc, *locals; static PyObject *orig, *excs, *update, *b, *fromlist, *level, *from; static PyObject **pieces, **values; static size_t jump; // Dummy variables for cache effects static uint16_t invert, counter, index, hint; +#define unused 0 // Used in a macro def, can't be static static uint32_t type_version; static PyObject * @@ -2418,52 +2420,108 @@ dummy_func( INSTRUMENTED_JUMP(here, target, PY_MONITORING_EVENT_BRANCH); } - inst(FOR_ITER_LIST, (unused/1, iter -- iter, next)) { + op(_ITER_CHECK_LIST, (iter -- iter)) { DEOPT_IF(Py_TYPE(iter) != &PyListIter_Type, FOR_ITER); + } + + op(_ITER_JUMP_LIST, (iter -- iter)) { _PyListIterObject *it = (_PyListIterObject *)iter; + assert(Py_TYPE(iter) == &PyListIter_Type); STAT_INC(FOR_ITER, hit); PyListObject *seq = it->it_seq; - if (seq) { - if (it->it_index < PyList_GET_SIZE(seq)) { - next = Py_NewRef(PyList_GET_ITEM(seq, it->it_index++)); - goto end_for_iter_list; // End of this instruction + if (seq == NULL || it->it_index >= PyList_GET_SIZE(seq)) { + if (seq != NULL) { + it->it_seq = NULL; + Py_DECREF(seq); } - it->it_seq = NULL; - Py_DECREF(seq); + Py_DECREF(iter); + STACK_SHRINK(1); + SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER); + /* Jump forward oparg, then skip following END_FOR instruction */ + JUMPBY(oparg + 1); + DISPATCH(); } - Py_DECREF(iter); - STACK_SHRINK(1); - SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER); - /* Jump forward oparg, then skip following END_FOR instruction */ - JUMPBY(oparg + 1); - DISPATCH(); - end_for_iter_list: - // Common case: no jump, leave it to the code generator } - inst(FOR_ITER_TUPLE, (unused/1, iter -- iter, next)) { + // Only used by Tier 2 + op(_IS_ITER_EXHAUSTED_LIST, (iter -- iter, exhausted)) { + _PyListIterObject *it = (_PyListIterObject *)iter; + assert(Py_TYPE(iter) == &PyListIter_Type); + PyListObject *seq = it->it_seq; + if (seq == NULL || it->it_index >= PyList_GET_SIZE(seq)) { + exhausted = Py_True; + } + else { + exhausted = Py_False; + } + } + + op(_ITER_NEXT_LIST, (iter -- iter, next)) { + _PyListIterObject *it = (_PyListIterObject *)iter; + assert(Py_TYPE(iter) == &PyListIter_Type); + PyListObject *seq = it->it_seq; + assert(seq); + assert(it->it_index < PyList_GET_SIZE(seq)); + next = Py_NewRef(PyList_GET_ITEM(seq, it->it_index++)); + } + + macro(FOR_ITER_LIST) = + unused/1 + // Skip over the counter + _ITER_CHECK_LIST + + _ITER_JUMP_LIST + + _ITER_NEXT_LIST; + + op(_ITER_CHECK_TUPLE, (iter -- iter)) { + DEOPT_IF(Py_TYPE(iter) != &PyTupleIter_Type, FOR_ITER); + } + + op(_ITER_JUMP_TUPLE, (iter -- iter)) { _PyTupleIterObject *it = (_PyTupleIterObject *)iter; - DEOPT_IF(Py_TYPE(it) != &PyTupleIter_Type, FOR_ITER); + assert(Py_TYPE(iter) == &PyTupleIter_Type); STAT_INC(FOR_ITER, hit); PyTupleObject *seq = it->it_seq; - if (seq) { - if (it->it_index < PyTuple_GET_SIZE(seq)) { - next = Py_NewRef(PyTuple_GET_ITEM(seq, it->it_index++)); - goto end_for_iter_tuple; // End of this instruction + if (seq == NULL || it->it_index >= PyTuple_GET_SIZE(seq)) { + if (seq != NULL) { + it->it_seq = NULL; + Py_DECREF(seq); } - it->it_seq = NULL; - Py_DECREF(seq); + Py_DECREF(iter); + STACK_SHRINK(1); + SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER); + /* Jump forward oparg, then skip following END_FOR instruction */ + JUMPBY(oparg + 1); + DISPATCH(); } - Py_DECREF(iter); - STACK_SHRINK(1); - SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER); - /* Jump forward oparg, then skip following END_FOR instruction */ - JUMPBY(oparg + 1); - DISPATCH(); - end_for_iter_tuple: - // Common case: no jump, leave it to the code generator } + // Only used by Tier 2 + op(_IS_ITER_EXHAUSTED_TUPLE, (iter -- iter, exhausted)) { + _PyTupleIterObject *it = (_PyTupleIterObject *)iter; + assert(Py_TYPE(iter) == &PyTupleIter_Type); + PyTupleObject *seq = it->it_seq; + if (seq == NULL || it->it_index >= PyTuple_GET_SIZE(seq)) { + exhausted = Py_True; + } + else { + exhausted = Py_False; + } + } + + op(_ITER_NEXT_TUPLE, (iter -- iter, next)) { + _PyTupleIterObject *it = (_PyTupleIterObject *)iter; + assert(Py_TYPE(iter) == &PyTupleIter_Type); + PyTupleObject *seq = it->it_seq; + assert(seq); + assert(it->it_index < PyTuple_GET_SIZE(seq)); + next = Py_NewRef(PyTuple_GET_ITEM(seq, it->it_index++)); + } + + macro(FOR_ITER_TUPLE) = + unused/1 + // Skip over the counter + _ITER_CHECK_TUPLE + + _ITER_JUMP_TUPLE + + _ITER_NEXT_TUPLE; + op(_ITER_CHECK_RANGE, (iter -- iter)) { _PyRangeIterObject *r = (_PyRangeIterObject *)iter; DEOPT_IF(Py_TYPE(r) != &PyRangeIter_Type, FOR_ITER); @@ -2484,7 +2542,7 @@ dummy_func( } // Only used by Tier 2 - op(_ITER_EXHAUSTED_RANGE, (iter -- iter, exhausted)) { + op(_IS_ITER_EXHAUSTED_RANGE, (iter -- iter, exhausted)) { _PyRangeIterObject *r = (_PyRangeIterObject *)iter; assert(Py_TYPE(r) == &PyRangeIter_Type); exhausted = r->len <= 0 ? Py_True : Py_False; @@ -2502,7 +2560,10 @@ dummy_func( } macro(FOR_ITER_RANGE) = - unused/1 + _ITER_CHECK_RANGE + _ITER_JUMP_RANGE + _ITER_NEXT_RANGE; + unused/1 + // Skip over the counter + _ITER_CHECK_RANGE + + _ITER_JUMP_RANGE + + _ITER_NEXT_RANGE; inst(FOR_ITER_GEN, (unused/1, iter -- iter, unused)) { DEOPT_IF(tstate->interp->eval_frame, FOR_ITER); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index ce54755d5d25f1..626baece814607 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1738,6 +1738,80 @@ break; } + case _ITER_CHECK_LIST: { + PyObject *iter = stack_pointer[-1]; + DEOPT_IF(Py_TYPE(iter) != &PyListIter_Type, FOR_ITER); + break; + } + + case _IS_ITER_EXHAUSTED_LIST: { + PyObject *iter = stack_pointer[-1]; + PyObject *exhausted; + _PyListIterObject *it = (_PyListIterObject *)iter; + assert(Py_TYPE(iter) == &PyListIter_Type); + PyListObject *seq = it->it_seq; + if (seq == NULL || it->it_index >= PyList_GET_SIZE(seq)) { + exhausted = Py_True; + } + else { + exhausted = Py_False; + } + STACK_GROW(1); + stack_pointer[-1] = exhausted; + break; + } + + case _ITER_NEXT_LIST: { + PyObject *iter = stack_pointer[-1]; + PyObject *next; + _PyListIterObject *it = (_PyListIterObject *)iter; + assert(Py_TYPE(iter) == &PyListIter_Type); + PyListObject *seq = it->it_seq; + assert(seq); + assert(it->it_index < PyList_GET_SIZE(seq)); + next = Py_NewRef(PyList_GET_ITEM(seq, it->it_index++)); + STACK_GROW(1); + stack_pointer[-1] = next; + break; + } + + case _ITER_CHECK_TUPLE: { + PyObject *iter = stack_pointer[-1]; + DEOPT_IF(Py_TYPE(iter) != &PyTupleIter_Type, FOR_ITER); + break; + } + + case _IS_ITER_EXHAUSTED_TUPLE: { + PyObject *iter = stack_pointer[-1]; + PyObject *exhausted; + _PyTupleIterObject *it = (_PyTupleIterObject *)iter; + assert(Py_TYPE(iter) == &PyTupleIter_Type); + PyTupleObject *seq = it->it_seq; + if (seq == NULL || it->it_index >= PyTuple_GET_SIZE(seq)) { + exhausted = Py_True; + } + else { + exhausted = Py_False; + } + STACK_GROW(1); + stack_pointer[-1] = exhausted; + break; + } + + case _ITER_NEXT_TUPLE: { + PyObject *iter = stack_pointer[-1]; + PyObject *next; + _PyTupleIterObject *it = (_PyTupleIterObject *)iter; + assert(Py_TYPE(iter) == &PyTupleIter_Type); + PyTupleObject *seq = it->it_seq; + assert(seq); + assert(it->it_index < PyTuple_GET_SIZE(seq)); + next = Py_NewRef(PyTuple_GET_ITEM(seq, it->it_index++)); + STACK_GROW(1); + stack_pointer[-1] = next; + break; + } + case _ITER_CHECK_RANGE: { PyObject *iter = stack_pointer[-1]; _PyRangeIterObject *r = (_PyRangeIterObject *)iter; @@ -1745,7 +1819,7 @@ break; } - case _ITER_EXHAUSTED_RANGE: { + case _IS_ITER_EXHAUSTED_RANGE: { PyObject *iter = stack_pointer[-1]; PyObject *exhausted; _PyRangeIterObject *r = (_PyRangeIterObject *)iter; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index d43c7386bd6f6d..68531dc074769e 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3051,60 +3051,96 @@ } TARGET(FOR_ITER_LIST) { - PyObject *iter = stack_pointer[-1]; - PyObject *next; - DEOPT_IF(Py_TYPE(iter) != &PyListIter_Type, FOR_ITER); - _PyListIterObject *it = (_PyListIterObject *)iter; - STAT_INC(FOR_ITER, hit); - PyListObject *seq = it->it_seq; - if (seq) { - if (it->it_index < PyList_GET_SIZE(seq)) { - next = Py_NewRef(PyList_GET_ITEM(seq, it->it_index++)); - goto end_for_iter_list; // End of this instruction + PyObject *_tmp_1; + PyObject *_tmp_2 = stack_pointer[-1]; + { + PyObject *iter = _tmp_2; + DEOPT_IF(Py_TYPE(iter) != &PyListIter_Type, FOR_ITER); + _tmp_2 = iter; + } + { + PyObject *iter = _tmp_2; + _PyListIterObject *it = (_PyListIterObject *)iter; + assert(Py_TYPE(iter) == &PyListIter_Type); + STAT_INC(FOR_ITER, hit); + PyListObject *seq = it->it_seq; + if (seq == NULL || it->it_index >= PyList_GET_SIZE(seq)) { + if (seq != NULL) { + it->it_seq = NULL; + Py_DECREF(seq); + } + Py_DECREF(iter); + STACK_SHRINK(1); + SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER); + /* Jump forward oparg, then skip following END_FOR instruction */ + JUMPBY(oparg + 1); + DISPATCH(); } - it->it_seq = NULL; - Py_DECREF(seq); + _tmp_2 = iter; + } + { + PyObject *iter = _tmp_2; + PyObject *next; + _PyListIterObject *it = (_PyListIterObject *)iter; + assert(Py_TYPE(iter) == &PyListIter_Type); + PyListObject *seq = it->it_seq; + assert(seq); + assert(it->it_index < PyList_GET_SIZE(seq)); + next = Py_NewRef(PyList_GET_ITEM(seq, it->it_index++)); + _tmp_2 = iter; + _tmp_1 = next; } - Py_DECREF(iter); - STACK_SHRINK(1); - SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER); - /* Jump forward oparg, then skip following END_FOR instruction */ - JUMPBY(oparg + 1); - DISPATCH(); - end_for_iter_list: - // Common case: no jump, leave it to the code generator - STACK_GROW(1); - stack_pointer[-1] = next; next_instr += 1; + STACK_GROW(1); + stack_pointer[-1] = _tmp_1; + stack_pointer[-2] = _tmp_2; DISPATCH(); } TARGET(FOR_ITER_TUPLE) { - PyObject *iter = stack_pointer[-1]; - PyObject *next; - _PyTupleIterObject *it = (_PyTupleIterObject *)iter; - DEOPT_IF(Py_TYPE(it) != &PyTupleIter_Type, FOR_ITER); - STAT_INC(FOR_ITER, hit); - PyTupleObject *seq = it->it_seq; - if (seq) { - if (it->it_index < PyTuple_GET_SIZE(seq)) { - next = Py_NewRef(PyTuple_GET_ITEM(seq, it->it_index++)); - goto end_for_iter_tuple; // End of this instruction + PyObject *_tmp_1; + PyObject *_tmp_2 = stack_pointer[-1]; + { + PyObject *iter = _tmp_2; + DEOPT_IF(Py_TYPE(iter) != &PyTupleIter_Type, FOR_ITER); + _tmp_2 = iter; + } + { + PyObject *iter = _tmp_2; + _PyTupleIterObject *it = (_PyTupleIterObject *)iter; + assert(Py_TYPE(iter) == &PyTupleIter_Type); + STAT_INC(FOR_ITER, hit); + PyTupleObject *seq = it->it_seq; + if (seq == NULL || it->it_index >= PyTuple_GET_SIZE(seq)) { + if (seq != NULL) { + it->it_seq = NULL; + Py_DECREF(seq); + } + Py_DECREF(iter); + STACK_SHRINK(1); + SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER); + /* Jump forward oparg, then skip following END_FOR instruction */ + JUMPBY(oparg + 1); + DISPATCH(); } - it->it_seq = NULL; - Py_DECREF(seq); + _tmp_2 = iter; + } + { + PyObject *iter = _tmp_2; + PyObject *next; + _PyTupleIterObject *it = (_PyTupleIterObject *)iter; + assert(Py_TYPE(iter) == &PyTupleIter_Type); + PyTupleObject *seq = it->it_seq; + assert(seq); + assert(it->it_index < PyTuple_GET_SIZE(seq)); + next = Py_NewRef(PyTuple_GET_ITEM(seq, it->it_index++)); + _tmp_2 = iter; + _tmp_1 = next; } - Py_DECREF(iter); - STACK_SHRINK(1); - SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER); - /* Jump forward oparg, then skip following END_FOR instruction */ - JUMPBY(oparg + 1); - DISPATCH(); - end_for_iter_tuple: - // Common case: no jump, leave it to the code generator - STACK_GROW(1); - stack_pointer[-1] = next; next_instr += 1; + STACK_GROW(1); + stack_pointer[-1] = _tmp_1; + stack_pointer[-2] = _tmp_2; DISPATCH(); } diff --git a/Python/optimizer.c b/Python/optimizer.c index abd2351f6b78bd..289b202f806ae1 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -378,6 +378,7 @@ translate_bytecode_to_trace( _Py_CODEUNIT *initial_instr = instr; int trace_length = 0; int max_length = buffer_size; + int reserved = 0; #ifdef Py_DEBUG char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); @@ -385,6 +386,9 @@ translate_bytecode_to_trace( if (uop_debug != NULL && *uop_debug >= '0') { lltrace = *uop_debug - '0'; // TODO: Parse an int and all that } +#endif + +#ifdef Py_DEBUG #define DPRINTF(level, ...) \ if (lltrace >= (level)) { fprintf(stderr, __VA_ARGS__); } #else @@ -397,6 +401,8 @@ translate_bytecode_to_trace( uop_name(OPCODE), \ (uint64_t)(OPERAND)); \ assert(trace_length < max_length); \ + assert(reserved > 0); \ + reserved--; \ trace[trace_length].opcode = (OPCODE); \ trace[trace_length].operand = (OPERAND); \ trace_length++; @@ -409,9 +415,23 @@ translate_bytecode_to_trace( (INDEX), \ uop_name(OPCODE), \ (uint64_t)(OPERAND)); \ + assert(reserved > 0); \ + reserved--; \ trace[(INDEX)].opcode = (OPCODE); \ trace[(INDEX)].operand = (OPERAND); +// Reserve space for n uops +#define RESERVE_RAW(n, opname) \ + if (trace_length + (n) > max_length) { \ + DPRINTF(2, "No room for %s (need %d, got %d)\n", \ + (opname), (n), max_length - trace_length); \ + goto done; \ + } \ + reserved = (n); // Keep ADD_TO_TRACE / ADD_TO_STUB honest + +// Reserve space for main+stub uops, plus 2 for SAVE_IP and EXIT_TRACE +#define RESERVE(main, stub) RESERVE_RAW((main) + (stub) + 2, uop_name(opcode)) + DPRINTF(4, "Optimizing %s (%s:%d) at byte offset %ld\n", PyUnicode_AsUTF8(code->co_qualname), @@ -420,16 +440,20 @@ translate_bytecode_to_trace( 2 * INSTR_IP(initial_instr, code)); for (;;) { + RESERVE_RAW(2, "epilogue"); // Always need space for SAVE_IP and EXIT_TRACE ADD_TO_TRACE(SAVE_IP, INSTR_IP(instr, code)); + int opcode = instr->op.code; int oparg = instr->op.arg; int extras = 0; + while (opcode == EXTENDED_ARG) { instr++; extras += 1; opcode = instr->op.code; oparg = (oparg << 8) | instr->op.arg; } + if (opcode == ENTER_EXECUTOR) { _PyExecutorObject *executor = (_PyExecutorObject *)code->co_executors->executors[oparg&255]; @@ -437,17 +461,14 @@ translate_bytecode_to_trace( DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]); oparg = (oparg & 0xffffff00) | executor->vm_data.oparg; } + switch (opcode) { case POP_JUMP_IF_FALSE: case POP_JUMP_IF_TRUE: { // Assume jump unlikely (TODO: handle jump likely case) - // Reserve 5 entries (1 here, 2 stub, plus SAVE_IP + EXIT_TRACE) - if (trace_length + 5 > max_length) { - DPRINTF(1, "Ran out of space for POP_JUMP_IF_FALSE\n"); - goto done; - } + RESERVE(1, 2); _Py_CODEUNIT *target_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + oparg; max_length -= 2; // Really the start of the stubs @@ -461,9 +482,8 @@ translate_bytecode_to_trace( case JUMP_BACKWARD: { - if (instr + 2 - oparg == initial_instr - && trace_length + 3 <= max_length) - { + if (instr + 2 - oparg == initial_instr) { + RESERVE(1, 0); ADD_TO_TRACE(JUMP_TO_TOP, 0); } else { @@ -474,26 +494,45 @@ translate_bytecode_to_trace( case JUMP_FORWARD: { + RESERVE(0, 0); // This will emit two SAVE_IP instructions; leave it to the optimizer instr += oparg; break; } + case FOR_ITER_LIST: + case FOR_ITER_TUPLE: case FOR_ITER_RANGE: { - // Assume jump unlikely (can a for-loop exit be likely?) - // Reserve 9 entries (4 here, 3 stub, plus SAVE_IP + EXIT_TRACE) - if (trace_length + 9 > max_length) { - DPRINTF(1, "Ran out of space for FOR_ITER_RANGE\n"); - goto done; + RESERVE(4, 3); + int check_op, exhausted_op, next_op; + switch (opcode) { + case FOR_ITER_LIST: + check_op = _ITER_CHECK_LIST; + exhausted_op = _IS_ITER_EXHAUSTED_LIST; + next_op = _ITER_NEXT_LIST; + break; + case FOR_ITER_TUPLE: + check_op = _ITER_CHECK_TUPLE; + exhausted_op = _IS_ITER_EXHAUSTED_TUPLE; + next_op = _ITER_NEXT_TUPLE; + break; + case FOR_ITER_RANGE: + check_op = _ITER_CHECK_RANGE; + exhausted_op = _IS_ITER_EXHAUSTED_RANGE; + next_op = _ITER_NEXT_RANGE; + break; + default: + Py_UNREACHABLE(); } + // Assume jump unlikely (can a for-loop exit be likely?) _Py_CODEUNIT *target_instr = // +1 at the end skips over END_FOR instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + oparg + 1; max_length -= 3; // Really the start of the stubs - ADD_TO_TRACE(_ITER_CHECK_RANGE, 0); - ADD_TO_TRACE(_ITER_EXHAUSTED_RANGE, 0); + ADD_TO_TRACE(check_op, 0); + ADD_TO_TRACE(exhausted_op, 0); ADD_TO_TRACE(_POP_JUMP_IF_TRUE, max_length); - ADD_TO_TRACE(_ITER_NEXT_RANGE, 0); + ADD_TO_TRACE(next_op, 0); ADD_TO_STUB(max_length + 0, POP_TOP, 0); ADD_TO_STUB(max_length + 1, SAVE_IP, INSTR_IP(target_instr, code)); @@ -507,10 +546,7 @@ translate_bytecode_to_trace( if (expansion->nuops > 0) { // Reserve space for nuops (+ SAVE_IP + EXIT_TRACE) int nuops = expansion->nuops; - if (trace_length + nuops + 2 > max_length) { - DPRINTF(1, "Ran out of space for %s\n", uop_name(opcode)); - goto done; - } + RESERVE(nuops, 0); for (int i = 0; i < nuops; i++) { uint64_t operand; int offset = expansion->uops[i].offset; @@ -556,12 +592,14 @@ translate_bytecode_to_trace( } DPRINTF(2, "Unsupported opcode %s\n", uop_name(opcode)); goto done; // Break out of loop - } - } + } // End default + + } // End switch (opcode) + instr++; // Add cache size for opcode instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]]; - } + } // End for (;;) done: // Skip short traces like SAVE_IP, LOAD_FAST, SAVE_IP, EXIT_TRACE @@ -610,6 +648,9 @@ translate_bytecode_to_trace( } return 0; +#undef RESERVE +#undef RESERVE_RAW +#undef INSTR_IP #undef ADD_TO_TRACE #undef DPRINTF } pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy