From a08909dc765156a81adc296457f146909c068102 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Nov 2023 14:27:54 -0800 Subject: [PATCH 01/14] Add executor_cases.c.h dependency for ceval.o --- Makefile.pre.in | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile.pre.in b/Makefile.pre.in index 2174ec3ac56158..3d766425abba34 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1609,6 +1609,7 @@ Python/ceval.o: \ $(srcdir)/Python/ceval_macros.h \ $(srcdir)/Python/condvar.h \ $(srcdir)/Python/generated_cases.c.h \ + $(srcdir)/Python/executor_cases.c.h \ $(srcdir)/Python/opcode_targets.h Python/flowgraph.o: \ From 4c2914bff6bd7b133ac5016fb2f2342a21a8871e Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Nov 2023 10:46:06 -0800 Subject: [PATCH 02/14] Clean up flags.py --- Tools/cases_generator/flags.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Tools/cases_generator/flags.py b/Tools/cases_generator/flags.py index 0066c9e74512c3..808c9e82bbce07 100644 --- a/Tools/cases_generator/flags.py +++ b/Tools/cases_generator/flags.py @@ -53,7 +53,7 @@ def makes_escaping_api_call(instr: parsing.InstDef) -> bool: if "CALL_INTRINSIC" in instr.name: - return True; + return True tkns = iter(instr.tokens) for tkn in tkns: if tkn.kind != lx.IDENTIFIER: @@ -79,6 +79,7 @@ def makes_escaping_api_call(instr: parsing.InstDef) -> bool: return True return False + @dataclasses.dataclass class InstructionFlags: """Construct and manipulate instruction flags""" @@ -124,9 +125,7 @@ def fromInstruction(instr: parsing.InstDef) -> "InstructionFlags": or variable_used(instr, "exception_unwind") or variable_used(instr, "resume_with_error") ), - HAS_ESCAPES_FLAG=( - makes_escaping_api_call(instr) - ), + HAS_ESCAPES_FLAG=makes_escaping_api_call(instr), ) @staticmethod From 053a0a22936e8885333751546a1d9e100b7b6486 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Nov 2023 10:47:50 -0800 Subject: [PATCH 03/14] Clean up parsing.py --- Tools/cases_generator/parsing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py index 49459be68ae5e8..d36bd52b022ea9 100644 --- a/Tools/cases_generator/parsing.py +++ b/Tools/cases_generator/parsing.py @@ -105,7 +105,7 @@ class OpName(Node): @dataclass class InstHeader(Node): - annotations : list[str] + annotations: list[str] kind: Literal["inst", "op"] name: str inputs: list[InputEffect] @@ -114,7 +114,7 @@ class InstHeader(Node): @dataclass class InstDef(Node): - annotations : list[str] + annotations: list[str] kind: Literal["inst", "op"] name: str inputs: list[InputEffect] From b838435c80dd6eb33c0d3e83bb11f3cd14dab66a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Nov 2023 15:32:13 -0800 Subject: [PATCH 04/14] Add back printing optimized uops --- Python/optimizer.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index e14ad89bbe2921..5d1ef8a683c250 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -325,7 +325,8 @@ uop_dealloc(_PyUOpExecutorObject *self) { } static const char * -uop_name(int index) { +uop_name(int index) +{ if (index <= MAX_REAL_OPCODE) { return _PyOpcode_OpName[index]; } @@ -832,6 +833,24 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) assert(dest == -1); executor->base.execute = _PyUopExecute; _Py_ExecutorInit((_PyExecutorObject *)executor, dependencies); +#ifdef Py_DEBUG + char *python_lltrace = Py_GETENV("PYTHON_LLTRACE"); + int lltrace = 0; + if (python_lltrace != NULL && *python_lltrace >= '0') { + lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that + } + if (lltrace >= 2) { + printf("Optimized executor (length %d):\n", length); + for (int i = 0; i < length; i++) { + printf("%4d %s(%d, %d, %" PRIu64 ")\n", + i, + uop_name(executor->trace[i].opcode), + executor->trace[i].oparg, + executor->trace[i].target, + executor->trace[i].operand); + } + } +#endif return (_PyExecutorObject *)executor; } From b28effa9f7b43ed892cc31bb2872708d64ec0588 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Nov 2023 15:37:24 -0800 Subject: [PATCH 05/14] Hacky way to make FOR_ITER a viable uop --- Include/internal/pycore_opcode_metadata.h | 86 +++++++++++++---------- Python/abstract_interp_cases.c.h | 10 +++ Python/bytecodes.c | 29 +++++++- Python/executor_cases.c.h | 49 +++++++++++++ Python/optimizer.c | 1 + Tools/cases_generator/flags.py | 2 +- Tools/cases_generator/generate_cases.py | 2 +- Tools/cases_generator/instructions.py | 2 +- 8 files changed, 137 insertions(+), 44 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 4d98b23df5d927..1442350411c90a 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -81,45 +81,46 @@ #define _IS_NONE 353 #define _SPECIALIZE_FOR_ITER 354 #define _FOR_ITER 355 -#define _ITER_CHECK_LIST 356 -#define _ITER_JUMP_LIST 357 -#define _GUARD_NOT_EXHAUSTED_LIST 358 -#define _ITER_NEXT_LIST 359 -#define _ITER_CHECK_TUPLE 360 -#define _ITER_JUMP_TUPLE 361 -#define _GUARD_NOT_EXHAUSTED_TUPLE 362 -#define _ITER_NEXT_TUPLE 363 -#define _ITER_CHECK_RANGE 364 -#define _ITER_JUMP_RANGE 365 -#define _GUARD_NOT_EXHAUSTED_RANGE 366 -#define _ITER_NEXT_RANGE 367 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 368 -#define _GUARD_KEYS_VERSION 369 -#define _LOAD_ATTR_METHOD_WITH_VALUES 370 -#define _LOAD_ATTR_METHOD_NO_DICT 371 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 372 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 373 -#define _CHECK_ATTR_METHOD_LAZY_DICT 374 -#define _LOAD_ATTR_METHOD_LAZY_DICT 375 -#define _SPECIALIZE_CALL 376 -#define _CALL 377 -#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 378 -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 379 -#define _CHECK_PEP_523 380 -#define _CHECK_FUNCTION_EXACT_ARGS 381 -#define _CHECK_STACK_SPACE 382 -#define _INIT_CALL_PY_EXACT_ARGS 383 -#define _PUSH_FRAME 384 -#define _SPECIALIZE_BINARY_OP 385 -#define _BINARY_OP 386 -#define _GUARD_IS_TRUE_POP 387 -#define _GUARD_IS_FALSE_POP 388 -#define _GUARD_IS_NONE_POP 389 -#define _GUARD_IS_NOT_NONE_POP 390 -#define _JUMP_TO_TOP 391 -#define _SAVE_RETURN_OFFSET 392 -#define _INSERT 393 -#define _CHECK_VALIDITY 394 +#define _FOR_ITER_TIER_TWO 356 +#define _ITER_CHECK_LIST 357 +#define _ITER_JUMP_LIST 358 +#define _GUARD_NOT_EXHAUSTED_LIST 359 +#define _ITER_NEXT_LIST 360 +#define _ITER_CHECK_TUPLE 361 +#define _ITER_JUMP_TUPLE 362 +#define _GUARD_NOT_EXHAUSTED_TUPLE 363 +#define _ITER_NEXT_TUPLE 364 +#define _ITER_CHECK_RANGE 365 +#define _ITER_JUMP_RANGE 366 +#define _GUARD_NOT_EXHAUSTED_RANGE 367 +#define _ITER_NEXT_RANGE 368 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 369 +#define _GUARD_KEYS_VERSION 370 +#define _LOAD_ATTR_METHOD_WITH_VALUES 371 +#define _LOAD_ATTR_METHOD_NO_DICT 372 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 373 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 374 +#define _CHECK_ATTR_METHOD_LAZY_DICT 375 +#define _LOAD_ATTR_METHOD_LAZY_DICT 376 +#define _SPECIALIZE_CALL 377 +#define _CALL 378 +#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 379 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 380 +#define _CHECK_PEP_523 381 +#define _CHECK_FUNCTION_EXACT_ARGS 382 +#define _CHECK_STACK_SPACE 383 +#define _INIT_CALL_PY_EXACT_ARGS 384 +#define _PUSH_FRAME 385 +#define _SPECIALIZE_BINARY_OP 386 +#define _BINARY_OP 387 +#define _GUARD_IS_TRUE_POP 388 +#define _GUARD_IS_FALSE_POP 389 +#define _GUARD_IS_NONE_POP 390 +#define _GUARD_IS_NOT_NONE_POP 391 +#define _JUMP_TO_TOP 392 +#define _SAVE_RETURN_OFFSET 393 +#define _INSERT 394 +#define _CHECK_VALIDITY 395 extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump); #ifdef NEED_OPCODE_METADATA @@ -543,6 +544,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 1; case _FOR_ITER: return 1; + case _FOR_ITER_TIER_TWO: + return 1; case FOR_ITER: return 1; case INSTRUMENTED_FOR_ITER: @@ -1181,6 +1184,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case _FOR_ITER: return 2; + case _FOR_ITER_TIER_TWO: + return 2; case FOR_ITER: return 2; case INSTRUMENTED_FOR_ITER: @@ -1676,6 +1681,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [GET_YIELD_FROM_ITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [_SPECIALIZE_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [_FOR_ITER] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [_FOR_ITER_TIER_TWO] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [_ITER_CHECK_LIST] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, @@ -1906,6 +1912,7 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN [MATCH_KEYS] = { .nuops = 1, .uops = { { MATCH_KEYS, 0, 0 } } }, [GET_ITER] = { .nuops = 1, .uops = { { GET_ITER, 0, 0 } } }, [GET_YIELD_FROM_ITER] = { .nuops = 1, .uops = { { GET_YIELD_FROM_ITER, 0, 0 } } }, + [FOR_ITER] = { .nuops = 1, .uops = { { _FOR_ITER, 0, 0 } } }, [FOR_ITER_LIST] = { .nuops = 3, .uops = { { _ITER_CHECK_LIST, 0, 0 }, { _ITER_JUMP_LIST, 0, 0 }, { _ITER_NEXT_LIST, 0, 0 } } }, [FOR_ITER_TUPLE] = { .nuops = 3, .uops = { { _ITER_CHECK_TUPLE, 0, 0 }, { _ITER_JUMP_TUPLE, 0, 0 }, { _ITER_NEXT_TUPLE, 0, 0 } } }, [FOR_ITER_RANGE] = { .nuops = 3, .uops = { { _ITER_CHECK_RANGE, 0, 0 }, { _ITER_JUMP_RANGE, 0, 0 }, { _ITER_NEXT_RANGE, 0, 0 } } }, @@ -2005,6 +2012,7 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = { [_IS_NONE] = "_IS_NONE", [_SPECIALIZE_FOR_ITER] = "_SPECIALIZE_FOR_ITER", [_FOR_ITER] = "_FOR_ITER", + [_FOR_ITER_TIER_TWO] = "_FOR_ITER_TIER_TWO", [_ITER_CHECK_LIST] = "_ITER_CHECK_LIST", [_ITER_JUMP_LIST] = "_ITER_JUMP_LIST", [_GUARD_NOT_EXHAUSTED_LIST] = "_GUARD_NOT_EXHAUSTED_LIST", diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index a2f6aa8def8f69..28338f53ea7fb9 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -242,6 +242,10 @@ break; } + case _SPECIALIZE_UNPACK_SEQUENCE: { + break; + } + case _UNPACK_SEQUENCE: { STACK_SHRINK(1); STACK_GROW(oparg); @@ -624,6 +628,12 @@ break; } + case _FOR_ITER_TIER_TWO: { + STACK_GROW(1); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true); + break; + } + case _ITER_CHECK_LIST: { break; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 8a7dcb8416eb8c..da58cb75e607ed 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2368,7 +2368,7 @@ dummy_func( goto enter_tier_one; } - replaced op(_POP_JUMP_IF_FALSE, (unused/1, cond -- )) { + replaced op(_POP_JUMP_IF_FALSE, (unused/1, cond -- )) { assert(PyBool_Check(cond)); int flag = Py_IsFalse(cond); #if ENABLE_SPECIALIZATION @@ -2512,7 +2512,7 @@ dummy_func( #endif /* ENABLE_SPECIALIZATION */ } - op(_FOR_ITER, (iter -- iter, next)) { + replaced op(_FOR_ITER, (iter -- iter, next)) { /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ next = (*Py_TYPE(iter)->tp_iternext)(iter); if (next == NULL) { @@ -2535,6 +2535,31 @@ dummy_func( // Common case: no jump, leave it to the code generator } + op(_FOR_ITER_TIER_TWO, (iter -- iter, next)) { + /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ + next = (*Py_TYPE(iter)->tp_iternext)(iter); + if (next == NULL) { + if (_PyErr_Occurred(tstate)) { + if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { + GOTO_ERROR(error); + } + _PyErr_Clear(tstate); + } + /* iterator ended normally */ + Py_DECREF(iter); + STACK_SHRINK(1); + /* HACK: Emulate DEOPT_IF to jump over END_FOR */ + _PyFrame_SetStackPointer(frame, stack_pointer); + frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; + assert(frame->instr_ptr[-1].op.code == END_FOR || + frame->instr_ptr[-1].op.code == INSTRUMENTED_END_FOR); + Py_DECREF(current_executor); + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); + goto enter_tier_one; + } + // Common case: no jump, leave it to the code generator + } + macro(FOR_ITER) = _SPECIALIZE_FOR_ITER + _FOR_ITER; inst(INSTRUMENTED_FOR_ITER, (unused/1 -- )) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 4e29fb9f0fa93d..4f2f73ee76d5ef 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -866,6 +866,24 @@ break; } + case _SPECIALIZE_UNPACK_SEQUENCE: { + PyObject *seq; + seq = stack_pointer[-1]; + uint16_t counter = (uint16_t)operand; + #if ENABLE_SPECIALIZATION + if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + next_instr = this_instr; + _Py_Specialize_UnpackSequence(seq, next_instr, oparg); + DISPATCH_SAME_OPARG(); + } + STAT_INC(UNPACK_SEQUENCE, deferred); + DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + #endif /* ENABLE_SPECIALIZATION */ + (void)seq; + (void)counter; + break; + } + case _UNPACK_SEQUENCE: { PyObject *seq; seq = stack_pointer[-1]; @@ -2101,6 +2119,37 @@ break; } + case _FOR_ITER_TIER_TWO: { + PyObject *iter; + PyObject *next; + iter = stack_pointer[-1]; + /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ + next = (*Py_TYPE(iter)->tp_iternext)(iter); + if (next == NULL) { + if (_PyErr_Occurred(tstate)) { + if (!_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) { + GOTO_ERROR(error); + } + _PyErr_Clear(tstate); + } + /* iterator ended normally */ + Py_DECREF(iter); + STACK_SHRINK(1); + /* HACK: Emulate DEOPT_IF to jump over END_FOR */ + _PyFrame_SetStackPointer(frame, stack_pointer); + frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; + assert(frame->instr_ptr[-1].op.code == END_FOR || + frame->instr_ptr[-1].op.code == INSTRUMENTED_END_FOR); + Py_DECREF(current_executor); + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); + goto enter_tier_one; + } + // Common case: no jump, leave it to the code generator + STACK_GROW(1); + stack_pointer[-1] = next; + break; + } + case _ITER_CHECK_LIST: { PyObject *iter; iter = stack_pointer[-1]; diff --git a/Python/optimizer.c b/Python/optimizer.c index 5d1ef8a683c250..5c9f965aeefa85 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -392,6 +392,7 @@ _PyUop_Replacements[OPCODE_METADATA_SIZE] = { [_ITER_JUMP_RANGE] = _GUARD_NOT_EXHAUSTED_RANGE, [_ITER_JUMP_LIST] = _GUARD_NOT_EXHAUSTED_LIST, [_ITER_JUMP_TUPLE] = _GUARD_NOT_EXHAUSTED_TUPLE, + [_FOR_ITER] = _FOR_ITER_TIER_TWO, }; static const uint16_t diff --git a/Tools/cases_generator/flags.py b/Tools/cases_generator/flags.py index 808c9e82bbce07..bf76112159e38e 100644 --- a/Tools/cases_generator/flags.py +++ b/Tools/cases_generator/flags.py @@ -175,7 +175,7 @@ def variable_used_unspecialized(node: parsing.Node, name: str) -> bool: tokens: list[lx.Token] = [] skipping = False for i, token in enumerate(node.tokens): - if token.kind == "MACRO": + if token.kind == "CMACRO": text = "".join(token.text.split()) # TODO: Handle nested #if if text == "#if": diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index d1dbfeae8d74f6..ba45e3a625072e 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -658,7 +658,7 @@ def write_macro_expansions( if not part.instr.is_viable_uop() and "replaced" not in part.instr.annotations: # This note just reminds us about macros that cannot # be expanded to Tier 2 uops. It is not an error. - # It is sometimes emitted for macros that have a + # Suppress it using 'replaced op(...)' for macros having # manual translation in translate_bytecode_to_trace() # in Python/optimizer.c. if len(parts) > 1 or part.instr.name != name: diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index 9039ac5c6f127e..457221a0e15f75 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -115,7 +115,7 @@ def __init__(self, inst: parsing.InstDef): def is_viable_uop(self) -> bool: """Whether this instruction is viable as a uop.""" dprint: typing.Callable[..., None] = lambda *args, **kwargs: None - if "FRAME" in self.name: + if self.name == "_FOR_ITER_TIER_TWO": dprint = print if self.name == "_EXIT_TRACE": From de8f199a17d2daec020fff97f0661e2a7c0f29ff Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Nov 2023 16:03:13 -0800 Subject: [PATCH 06/14] _SPECIALIZE_UNPACK_SEQUENCE is TIER_ONE_ONLY --- Python/abstract_interp_cases.c.h | 4 ---- Python/bytecodes.c | 1 + Python/executor_cases.c.h | 18 ------------------ Python/generated_cases.c.h | 1 + 4 files changed, 2 insertions(+), 22 deletions(-) diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 28338f53ea7fb9..0d7fbe8a39a5d4 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -242,10 +242,6 @@ break; } - case _SPECIALIZE_UNPACK_SEQUENCE: { - break; - } - case _UNPACK_SEQUENCE: { STACK_SHRINK(1); STACK_GROW(oparg); diff --git a/Python/bytecodes.c b/Python/bytecodes.c index da58cb75e607ed..64e511c7106f2a 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1210,6 +1210,7 @@ dummy_func( }; specializing op(_SPECIALIZE_UNPACK_SEQUENCE, (counter/1, seq -- seq)) { + TIER_ONE_ONLY #if ENABLE_SPECIALIZATION if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { next_instr = this_instr; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 4f2f73ee76d5ef..0652d7a3ab8b3f 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -866,24 +866,6 @@ break; } - case _SPECIALIZE_UNPACK_SEQUENCE: { - PyObject *seq; - seq = stack_pointer[-1]; - uint16_t counter = (uint16_t)operand; - #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { - next_instr = this_instr; - _Py_Specialize_UnpackSequence(seq, next_instr, oparg); - DISPATCH_SAME_OPARG(); - } - STAT_INC(UNPACK_SEQUENCE, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); - #endif /* ENABLE_SPECIALIZATION */ - (void)seq; - (void)counter; - break; - } - case _UNPACK_SEQUENCE: { PyObject *seq; seq = stack_pointer[-1]; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index fe0cbfe6330e51..a74529d88557be 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1731,6 +1731,7 @@ seq = stack_pointer[-1]; { uint16_t counter = read_u16(&this_instr[1].cache); + TIER_ONE_ONLY #if ENABLE_SPECIALIZATION if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { next_instr = this_instr; From 5c5d8bd5d71e285a950cd40f6c0cb0c3cc575fef Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 15 Nov 2023 16:14:17 -0800 Subject: [PATCH 07/14] NEWS --- .../2023-11-15-16-14-10.gh-issue-106529.Y48ax9.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-11-15-16-14-10.gh-issue-106529.Y48ax9.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-11-15-16-14-10.gh-issue-106529.Y48ax9.rst b/Misc/NEWS.d/next/Core and Builtins/2023-11-15-16-14-10.gh-issue-106529.Y48ax9.rst new file mode 100644 index 00000000000000..b2a34ac735cdeb --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-11-15-16-14-10.gh-issue-106529.Y48ax9.rst @@ -0,0 +1 @@ +Enable translating unspecialized ``FOR_ITER`` to Tier 2. From 36e9ada4d03af2207babd57bfbfd490ed0f9b5fb Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 16 Nov 2023 12:34:16 -0800 Subject: [PATCH 08/14] Double max trace length to 256 --- Include/internal/pycore_uops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_uops.h b/Include/internal/pycore_uops.h index 0ecbd2dfd1af73..8ab9aaf4108079 100644 --- a/Include/internal/pycore_uops.h +++ b/Include/internal/pycore_uops.h @@ -10,7 +10,7 @@ extern "C" { #include "pycore_frame.h" // _PyInterpreterFrame -#define _Py_UOP_MAX_TRACE_LENGTH 128 +#define _Py_UOP_MAX_TRACE_LENGTH 256 typedef struct { uint16_t opcode; From def1830fad23042b498b21263305a7051790991d Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 16 Nov 2023 15:24:02 -0800 Subject: [PATCH 09/14] Move stuff around to suit the JIT branch --- Include/internal/pycore_opcode_metadata.h | 2 +- Python/bytecodes.c | 6 ++---- Python/ceval.c | 3 +-- Python/ceval_macros.h | 2 ++ Python/executor_cases.c.h | 6 ++---- 5 files changed, 8 insertions(+), 11 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 1442350411c90a..d6e348ae951c55 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1784,7 +1784,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [_JUMP_TO_TOP] = { true, INSTR_FMT_IX, HAS_EVAL_BREAK_FLAG }, [_SET_IP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [_SAVE_RETURN_OFFSET] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, - [_EXIT_TRACE] = { true, INSTR_FMT_IX, 0 }, + [_EXIT_TRACE] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, [_INSERT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [_CHECK_VALIDITY] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, }; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 64e511c7106f2a..20afb1f9197d8c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2550,13 +2550,10 @@ dummy_func( Py_DECREF(iter); STACK_SHRINK(1); /* HACK: Emulate DEOPT_IF to jump over END_FOR */ - _PyFrame_SetStackPointer(frame, stack_pointer); frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; assert(frame->instr_ptr[-1].op.code == END_FOR || frame->instr_ptr[-1].op.code == INSTRUMENTED_END_FOR); - Py_DECREF(current_executor); - OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); - goto enter_tier_one; + goto exit_trace; } // Common case: no jump, leave it to the code generator } @@ -4034,6 +4031,7 @@ dummy_func( op(_EXIT_TRACE, (--)) { TIER_TWO_ONLY + frame->instr_ptr = CURRENT_TARGET() + _PyCode_CODE(_PyFrame_GetCode(frame)); GOTO_TIER_ONE(); } diff --git a/Python/ceval.c b/Python/ceval.c index d684c72cc9e302..f1add9f8cf17a9 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1067,7 +1067,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int UOP_STAT_INC(opcode, miss); frame->return_offset = 0; // Dispatch to frame->instr_ptr _PyFrame_SetStackPointer(frame, stack_pointer); - frame->instr_ptr = next_uop[-1].target + _PyCode_CODE((PyCodeObject *)frame->f_executable); + frame->instr_ptr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame)); Py_DECREF(current_executor); // Fall through // Jump here from ENTER_EXECUTOR @@ -1078,7 +1078,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int // Jump here from _EXIT_TRACE exit_trace: _PyFrame_SetStackPointer(frame, stack_pointer); - frame->instr_ptr = next_uop[-1].target + _PyCode_CODE((PyCodeObject *)frame->f_executable); Py_DECREF(current_executor); OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); goto enter_tier_one; diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 546adbe5f438d1..cd6edeb0734a11 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -397,3 +397,5 @@ stack_pointer = _PyFrame_GetStackPointer(frame); #define GOTO_TIER_TWO() goto enter_tier_two; #define GOTO_TIER_ONE() goto exit_trace; + +#define CURRENT_TARGET() (next_uop[-1].target) diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 0652d7a3ab8b3f..ccc72d3427e7ac 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2118,13 +2118,10 @@ Py_DECREF(iter); STACK_SHRINK(1); /* HACK: Emulate DEOPT_IF to jump over END_FOR */ - _PyFrame_SetStackPointer(frame, stack_pointer); frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; assert(frame->instr_ptr[-1].op.code == END_FOR || frame->instr_ptr[-1].op.code == INSTRUMENTED_END_FOR); - Py_DECREF(current_executor); - OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); - goto enter_tier_one; + goto exit_trace; } // Common case: no jump, leave it to the code generator STACK_GROW(1); @@ -3276,6 +3273,7 @@ case _EXIT_TRACE: { TIER_TWO_ONLY + frame->instr_ptr = CURRENT_TARGET() + _PyCode_CODE(_PyFrame_GetCode(frame)); GOTO_TIER_ONE(); break; } From 70968183bf7dbc5c487d9a20d426ba997de0fc34 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 17 Nov 2023 12:47:24 -0800 Subject: [PATCH 10/14] Clean up _FOR_ITER_TIER_TWO using DEOPT_IF(true) --- Include/internal/pycore_opcode_metadata.h | 2 +- Python/bytecodes.c | 7 ++----- Python/executor_cases.c.h | 7 ++----- Python/optimizer.c | 6 ++++++ 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index d6e348ae951c55..1a2c4956849011 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1681,7 +1681,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [GET_YIELD_FROM_ITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [_SPECIALIZE_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [_FOR_ITER] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [_FOR_ITER_TIER_TWO] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [_FOR_ITER_TIER_TWO] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [_ITER_CHECK_LIST] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 20afb1f9197d8c..abc51d82e9409f 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2549,11 +2549,8 @@ dummy_func( /* iterator ended normally */ Py_DECREF(iter); STACK_SHRINK(1); - /* HACK: Emulate DEOPT_IF to jump over END_FOR */ - frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; - assert(frame->instr_ptr[-1].op.code == END_FOR || - frame->instr_ptr[-1].op.code == INSTRUMENTED_END_FOR); - goto exit_trace; + /* The translator sets the deopt target just past END_FOR */ + DEOPT_IF(true); } // Common case: no jump, leave it to the code generator } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index ccc72d3427e7ac..8d13c524794008 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2117,11 +2117,8 @@ /* iterator ended normally */ Py_DECREF(iter); STACK_SHRINK(1); - /* HACK: Emulate DEOPT_IF to jump over END_FOR */ - frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; - assert(frame->instr_ptr[-1].op.code == END_FOR || - frame->instr_ptr[-1].op.code == INSTRUMENTED_END_FOR); - goto exit_trace; + /* The translator sets the deopt target just past END_FOR */ + DEOPT_IF(true, _FOR_ITER_TIER_TWO); } // Common case: no jump, leave it to the code generator STACK_GROW(1); diff --git a/Python/optimizer.c b/Python/optimizer.c index 5c9f965aeefa85..2df1d266e67f95 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -621,6 +621,12 @@ translate_bytecode_to_trace( } if (_PyUop_Replacements[uop]) { uop = _PyUop_Replacements[uop]; + if (uop == _FOR_ITER_TIER_TWO) { + target += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; + _Py_CODEUNIT word = _PyCode_CODE(code)[target-1]; + assert(word.op.code == END_FOR || + word.op.code == INSTRUMENTED_END_FOR); + } } break; case OPARG_CACHE_1: From 58521059780ff091c118f3b89789831fcf822937 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 17 Nov 2023 13:16:47 -0800 Subject: [PATCH 11/14] Add test --- Lib/test/test_capi/test_misc.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index fe5c36c0c0dec9..21a5cd3326d707 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2808,6 +2808,36 @@ def testfunc(n): uops = {opname for opname, _, _ in ex} self.assertIn("_GUARD_IS_FALSE_POP", uops) + def test_for_iter_tier_two(self): + class MyIter: + def __init__(self, n): + self.n = n + def __iter__(self): + return self + def __next__(self): + self.n -= 1 + if self.n < 0: + raise StopIteration + return self.n + + def testfunc(n, m): + x = 0 + for i in range(m): + for j in MyIter(n): + x += 1000*i + j + return x + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + x = testfunc(10, 10) + + self.assertEqual(x, sum(range(10)) * 10010) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_FOR_ITER_TIER_TWO", uops) + if __name__ == "__main__": unittest.main() From 4ac68b3dfee0b632eaa2bfb62e3667799c095b6f Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 17 Nov 2023 14:06:17 -0800 Subject: [PATCH 12/14] Revert debug change to is_viable_uop() --- Tools/cases_generator/instructions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index 457221a0e15f75..9039ac5c6f127e 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -115,7 +115,7 @@ def __init__(self, inst: parsing.InstDef): def is_viable_uop(self) -> bool: """Whether this instruction is viable as a uop.""" dprint: typing.Callable[..., None] = lambda *args, **kwargs: None - if self.name == "_FOR_ITER_TIER_TWO": + if "FRAME" in self.name: dprint = print if self.name == "_EXIT_TRACE": From 95b1a01ea6f2b39b7bc96d2634b74f27cb153e00 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 17 Nov 2023 14:07:03 -0800 Subject: [PATCH 13/14] Avoid debug-only local variable 'word' --- Python/optimizer.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 2df1d266e67f95..4278da3598e4f0 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -623,9 +623,8 @@ translate_bytecode_to_trace( uop = _PyUop_Replacements[uop]; if (uop == _FOR_ITER_TIER_TWO) { target += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; - _Py_CODEUNIT word = _PyCode_CODE(code)[target-1]; - assert(word.op.code == END_FOR || - word.op.code == INSTRUMENTED_END_FOR); + assert(_PyCode_CODE(code)[target-1].op.code == END_FOR || + _PyCode_CODE(code)[target-1].op.code == INSTRUMENTED_END_FOR); } } break; From 4c720287aa9910c89a0e7ea4b3ae76224c0f9892 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 17 Nov 2023 14:12:38 -0800 Subject: [PATCH 14/14] Revert changes to _EXIT_TRACE logic --- Include/internal/pycore_opcode_metadata.h | 2 +- Python/bytecodes.c | 1 - Python/ceval.c | 1 + Python/ceval_macros.h | 2 -- Python/executor_cases.c.h | 1 - 5 files changed, 2 insertions(+), 5 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 1a2c4956849011..4e45725d393479 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1784,7 +1784,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = { [_JUMP_TO_TOP] = { true, INSTR_FMT_IX, HAS_EVAL_BREAK_FLAG }, [_SET_IP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [_SAVE_RETURN_OFFSET] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, - [_EXIT_TRACE] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, + [_EXIT_TRACE] = { true, INSTR_FMT_IX, 0 }, [_INSERT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [_CHECK_VALIDITY] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, }; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index abc51d82e9409f..06baa85589b168 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4028,7 +4028,6 @@ dummy_func( op(_EXIT_TRACE, (--)) { TIER_TWO_ONLY - frame->instr_ptr = CURRENT_TARGET() + _PyCode_CODE(_PyFrame_GetCode(frame)); GOTO_TIER_ONE(); } diff --git a/Python/ceval.c b/Python/ceval.c index f1add9f8cf17a9..390de32f8e1b15 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1078,6 +1078,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int // Jump here from _EXIT_TRACE exit_trace: _PyFrame_SetStackPointer(frame, stack_pointer); + frame->instr_ptr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame)); Py_DECREF(current_executor); OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); goto enter_tier_one; diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index cd6edeb0734a11..546adbe5f438d1 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -397,5 +397,3 @@ stack_pointer = _PyFrame_GetStackPointer(frame); #define GOTO_TIER_TWO() goto enter_tier_two; #define GOTO_TIER_ONE() goto exit_trace; - -#define CURRENT_TARGET() (next_uop[-1].target) diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 8d13c524794008..ae662b20e4403f 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3270,7 +3270,6 @@ case _EXIT_TRACE: { TIER_TWO_ONLY - frame->instr_ptr = CURRENT_TARGET() + _PyCode_CODE(_PyFrame_GetCode(frame)); GOTO_TIER_ONE(); break; } pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy