From 56133bb0184e5d2b8bf5809b648aa512693a0a02 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Sat, 5 Aug 2023 12:13:46 -0700 Subject: [PATCH 01/19] Split `CALL_PY_EXACT_ARGS` into uops This is only the first step for doing `CALL` in Tier 2. The next step involves tracing into the called code object. After that we'll have to do the remaining `CALL` specialization. Finally we'll have to tweak various things like `KW_NAMES`, and possibly move the `NULL` (for method calls) *above* the callable. But those are things for future PRs. Note: this moves setting `frame->return_offset` directly in front of `DISPATCH_INLINED()`, to make it easier to move it into `_PUSH_FRAME`. --- Include/internal/pycore_opcode_metadata.h | 15 +++- Python/bytecodes.c | 35 +++++++--- Python/executor.c | 13 ++++ Python/executor_cases.c.h | 61 ++++++++++++++++ Python/generated_cases.c.h | 85 ++++++++++++++--------- Python/optimizer.c | 9 +++ Tools/cases_generator/generate_cases.py | 16 ++++- Tools/cases_generator/instructions.py | 20 +++--- Tools/cases_generator/stacking.py | 26 +++++-- 9 files changed, 220 insertions(+), 60 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 9f4437c09e92cb..3957c645c45336 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -52,9 +52,12 @@ #define _ITER_CHECK_RANGE 328 #define _IS_ITER_EXHAUSTED_RANGE 329 #define _ITER_NEXT_RANGE 330 -#define _POP_JUMP_IF_FALSE 331 -#define _POP_JUMP_IF_TRUE 332 -#define JUMP_TO_TOP 333 +#define _CHECK_CALL_PY_EXACT_ARGS 331 +#define _INIT_CALL_PY_EXACT_ARGS 332 +#define _PUSH_FRAME 333 +#define _POP_JUMP_IF_FALSE 334 +#define _POP_JUMP_IF_TRUE 335 +#define JUMP_TO_TOP 336 #ifndef NEED_OPCODE_METADATA extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump); @@ -951,6 +954,7 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { enum InstructionFormat { INSTR_FMT_IB, INSTR_FMT_IBC, + INSTR_FMT_IBC0, INSTR_FMT_IBC00, INSTR_FMT_IBC000, INSTR_FMT_IBC00000000, @@ -995,6 +999,7 @@ struct opcode_macro_expansion { #define OPARG_CACHE_4 4 #define OPARG_TOP 5 #define OPARG_BOTTOM 6 +#define OPARG_SAVE_IP 7 #define OPCODE_METADATA_FMT(OP) (_PyOpcode_opcode_metadata[(OP)].instr_format) #define SAME_OPCODE_METADATA(OP1, OP2) \ @@ -1336,6 +1341,7 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN [GET_YIELD_FROM_ITER] = { .nuops = 1, .uops = { { GET_YIELD_FROM_ITER, 0, 0 } } }, [WITH_EXCEPT_START] = { .nuops = 1, .uops = { { WITH_EXCEPT_START, 0, 0 } } }, [PUSH_EXC_INFO] = { .nuops = 1, .uops = { { PUSH_EXC_INFO, 0, 0 } } }, + [CALL_PY_EXACT_ARGS] = { .nuops = 4, .uops = { { _CHECK_CALL_PY_EXACT_ARGS, 2, 1 }, { _INIT_CALL_PY_EXACT_ARGS, 0, 0 }, { SAVE_IP, 7, 3 }, { _PUSH_FRAME, 0, 0 } } }, [CALL_NO_KW_TYPE_1] = { .nuops = 1, .uops = { { CALL_NO_KW_TYPE_1, 0, 0 } } }, [CALL_NO_KW_STR_1] = { .nuops = 1, .uops = { { CALL_NO_KW_STR_1, 0, 0 } } }, [CALL_NO_KW_TUPLE_1] = { .nuops = 1, .uops = { { CALL_NO_KW_TUPLE_1, 0, 0 } } }, @@ -1389,6 +1395,9 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = { [_ITER_CHECK_RANGE] = "_ITER_CHECK_RANGE", [_IS_ITER_EXHAUSTED_RANGE] = "_IS_ITER_EXHAUSTED_RANGE", [_ITER_NEXT_RANGE] = "_ITER_NEXT_RANGE", + [_CHECK_CALL_PY_EXACT_ARGS] = "_CHECK_CALL_PY_EXACT_ARGS", + [_INIT_CALL_PY_EXACT_ARGS] = "_INIT_CALL_PY_EXACT_ARGS", + [_PUSH_FRAME] = "_PUSH_FRAME", [_POP_JUMP_IF_FALSE] = "_POP_JUMP_IF_FALSE", [_POP_JUMP_IF_TRUE] = "_POP_JUMP_IF_TRUE", [JUMP_TO_TOP] = "JUMP_TO_TOP", diff --git a/Python/bytecodes.c b/Python/bytecodes.c index b2281abc6663da..7e6d1167b76c08 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -957,13 +957,13 @@ dummy_func( { PyGenObject *gen = (PyGenObject *)receiver; _PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; - frame->return_offset = oparg; STACK_SHRINK(1); _PyFrame_StackPush(gen_frame, v); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; SKIP_OVER(INLINE_CACHE_ENTRIES_SEND); + frame->return_offset = oparg; DISPATCH_INLINED(gen_frame); } if (Py_IsNone(v) && PyIter_Check(receiver)) { @@ -996,13 +996,13 @@ dummy_func( DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, SEND); STAT_INC(SEND, hit); _PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; - frame->return_offset = oparg; STACK_SHRINK(1); _PyFrame_StackPush(gen_frame, v); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; SKIP_OVER(INLINE_CACHE_ENTRIES_SEND); + frame->return_offset = oparg; DISPATCH_INLINED(gen_frame); } @@ -2586,7 +2586,6 @@ dummy_func( DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER); STAT_INC(FOR_ITER, hit); _PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; - frame->return_offset = oparg; _PyFrame_StackPush(gen_frame, Py_None); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; @@ -2594,6 +2593,7 @@ dummy_func( SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER); assert(next_instr[oparg].op.code == END_FOR || next_instr[oparg].op.code == INSTRUMENTED_END_FOR); + frame->return_offset = oparg; DISPATCH_INLINED(gen_frame); } @@ -2944,7 +2944,7 @@ dummy_func( GO_TO_INSTRUCTION(CALL_PY_EXACT_ARGS); } - inst(CALL_PY_EXACT_ARGS, (unused/1, func_version/2, callable, self_or_null, args[oparg] -- unused)) { + op(_CHECK_CALL_PY_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- method, callable, unused[oparg])) { ASSERT_KWNAMES_IS_NULL(); DEOPT_IF(tstate->interp->eval_frame, CALL); int argcount = oparg; @@ -2958,19 +2958,36 @@ dummy_func( PyCodeObject *code = (PyCodeObject *)func->func_code; DEOPT_IF(code->co_argcount != argcount, CALL); DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL); + } + + op(_INIT_CALL_PY_EXACT_ARGS, (method, callable, args[oparg] -- new_frame: _PyInterpreterFrame*)) { + int is_meth = method != NULL; + int argcount = oparg; + if (is_meth) { + callable = method; + args--; + argcount++; + } STAT_INC(CALL, hit); - _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); for (int i = 0; i < argcount; i++) { new_frame->localsplus[i] = args[i]; } - // Manipulate stack directly since we leave using DISPATCH_INLINED(). - STACK_SHRINK(oparg + 2); - SKIP_OVER(INLINE_CACHE_ENTRIES_CALL); + } + + op(_PUSH_FRAME, (new_frame: _PyInterpreterFrame* -- unused)) { frame->return_offset = 0; DISPATCH_INLINED(new_frame); } - inst(CALL_PY_WITH_DEFAULTS, (unused/1, func_version/2, callable, self_or_null, args[oparg] -- unused)) { + macro(CALL_PY_EXACT_ARGS) = + unused/1 + // Skip over the counter + _CHECK_CALL_PY_EXACT_ARGS + + _INIT_CALL_PY_EXACT_ARGS + + _PUSH_FRAME; + + inst(CALL_PY_WITH_DEFAULTS, (unused/1, func_version/2, method, callable, args[oparg] -- unused)) { ASSERT_KWNAMES_IS_NULL(); DEOPT_IF(tstate->interp->eval_frame, CALL); int argcount = oparg; diff --git a/Python/executor.c b/Python/executor.c index 4a18618c0c6c0c..cd673a7beeef88 100644 --- a/Python/executor.c +++ b/Python/executor.c @@ -30,6 +30,19 @@ #undef ENABLE_SPECIALIZATION #define ENABLE_SPECIALIZATION 0 +#undef DISPATCH_INLINED +#define DISPATCH_INLINED(NEW_FRAME) \ + do { \ + assert(tstate->interp->eval_frame == NULL); \ + _PyFrame_SetStackPointer(frame, stack_pointer); \ + frame->prev_instr -= 1; \ + (NEW_FRAME)->previous = frame; \ + frame = tstate->cframe->current_frame = (NEW_FRAME); \ + CALL_STAT_INC(inlined_py_calls); \ + stack_pointer = _PyFrame_GetStackPointer(frame); \ + ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; \ + } while (0) + _PyInterpreterFrame * _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject **stack_pointer) diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index d6d541a3b61ab4..c7fe281895d945 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2153,6 +2153,67 @@ break; } + case _CHECK_CALL_PY_EXACT_ARGS: { + PyObject *self_or_null; + PyObject *callable; + PyObject *method; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + uint32_t func_version = (uint32_t)operand; + ASSERT_KWNAMES_IS_NULL(); + DEOPT_IF(tstate->interp->eval_frame, CALL); + int argcount = oparg; + if (self_or_null != NULL) { + args--; + argcount++; + } + DEOPT_IF(!PyFunction_Check(callable), CALL); + PyFunctionObject *func = (PyFunctionObject *)callable; + DEOPT_IF(func->func_version != func_version, CALL); + PyCodeObject *code = (PyCodeObject *)func->func_code; + DEOPT_IF(code->co_argcount != argcount, CALL); + DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL); + stack_pointer[-2 - oparg] = method; + stack_pointer[-1 - oparg] = callable; + break; + } + + case _INIT_CALL_PY_EXACT_ARGS: { + PyObject **args; + PyObject *callable; + PyObject *method; + _PyInterpreterFrame *new_frame; + args = stack_pointer - oparg; + callable = stack_pointer[-1 - oparg]; + method = stack_pointer[-2 - oparg]; + int is_meth = method != NULL; + int argcount = oparg; + if (is_meth) { + callable = method; + args--; + argcount++; + } + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); + for (int i = 0; i < argcount; i++) { + new_frame->localsplus[i] = args[i]; + } + STACK_SHRINK(oparg); + STACK_SHRINK(1); + stack_pointer[-1] = (PyObject *)new_frame; + break; + } + + case _PUSH_FRAME: { + _PyInterpreterFrame *new_frame; + new_frame = (_PyInterpreterFrame *)stack_pointer[-1]; + STACK_SHRINK(1); + frame->return_offset = 0; + DISPATCH_INLINED(new_frame); + break; + } + case CALL_NO_KW_TYPE_1: { PyObject **args; PyObject *null; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index cf20b869b8182f..2137efeb507e82 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1191,13 +1191,13 @@ { PyGenObject *gen = (PyGenObject *)receiver; _PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; - frame->return_offset = oparg; STACK_SHRINK(1); _PyFrame_StackPush(gen_frame, v); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; SKIP_OVER(INLINE_CACHE_ENTRIES_SEND); + frame->return_offset = oparg; DISPATCH_INLINED(gen_frame); } if (Py_IsNone(v) && PyIter_Check(receiver)) { @@ -1237,13 +1237,13 @@ DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, SEND); STAT_INC(SEND, hit); _PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; - frame->return_offset = oparg; STACK_SHRINK(1); _PyFrame_StackPush(gen_frame, v); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; SKIP_OVER(INLINE_CACHE_ENTRIES_SEND); + frame->return_offset = oparg; DISPATCH_INLINED(gen_frame); } @@ -3341,7 +3341,6 @@ DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER); STAT_INC(FOR_ITER, hit); _PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; - frame->return_offset = oparg; _PyFrame_StackPush(gen_frame, Py_None); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; @@ -3349,6 +3348,7 @@ SKIP_OVER(INLINE_CACHE_ENTRIES_FOR_ITER); assert(next_instr[oparg].op.code == END_FOR || next_instr[oparg].op.code == INSTRUMENTED_END_FOR); + frame->return_offset = oparg; DISPATCH_INLINED(gen_frame); STACK_GROW(1); } @@ -3758,47 +3758,68 @@ TARGET(CALL_PY_EXACT_ARGS) { PREDICTED(CALL_PY_EXACT_ARGS); - PyObject **args; PyObject *self_or_null; PyObject *callable; - args = stack_pointer - oparg; + PyObject *method; + PyObject **args; + _PyInterpreterFrame *new_frame; + // _CHECK_CALL_PY_EXACT_ARGS self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; - uint32_t func_version = read_u32(&next_instr[1].cache); - ASSERT_KWNAMES_IS_NULL(); - DEOPT_IF(tstate->interp->eval_frame, CALL); - int argcount = oparg; - if (self_or_null != NULL) { - args--; - argcount++; - } - DEOPT_IF(!PyFunction_Check(callable), CALL); - PyFunctionObject *func = (PyFunctionObject *)callable; - DEOPT_IF(func->func_version != func_version, CALL); - PyCodeObject *code = (PyCodeObject *)func->func_code; - DEOPT_IF(code->co_argcount != argcount, CALL); - DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL); - STAT_INC(CALL, hit); - _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); - for (int i = 0; i < argcount; i++) { - new_frame->localsplus[i] = args[i]; + { + uint32_t func_version = read_u32(&next_instr[1].cache); + ASSERT_KWNAMES_IS_NULL(); + DEOPT_IF(tstate->interp->eval_frame, CALL); + int argcount = oparg; + if (self_or_null != NULL) { + args--; + argcount++; + } + DEOPT_IF(!PyFunction_Check(callable), CALL); + PyFunctionObject *func = (PyFunctionObject *)callable; + DEOPT_IF(func->func_version != func_version, CALL); + PyCodeObject *code = (PyCodeObject *)func->func_code; + DEOPT_IF(code->co_argcount != argcount, CALL); + DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL); + } + stack_pointer[-2 - oparg] = method; + stack_pointer[-1 - oparg] = callable; + // _INIT_CALL_PY_EXACT_ARGS + args = stack_pointer - oparg; + callable = stack_pointer[-1 - oparg]; + method = stack_pointer[-2 - oparg]; + { + int is_meth = method != NULL; + int argcount = oparg; + if (is_meth) { + callable = method; + args--; + argcount++; + } + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); + for (int i = 0; i < argcount; i++) { + new_frame->localsplus[i] = args[i]; + } } - // Manipulate stack directly since we leave using DISPATCH_INLINED(). - STACK_SHRINK(oparg + 2); - SKIP_OVER(INLINE_CACHE_ENTRIES_CALL); - frame->return_offset = 0; - DISPATCH_INLINED(new_frame); + // _PUSH_FRAME STACK_SHRINK(oparg); - STACK_SHRINK(1); + STACK_SHRINK(2); + next_instr += 3; + { + frame->return_offset = 0; + DISPATCH_INLINED(new_frame); + } } TARGET(CALL_PY_WITH_DEFAULTS) { PyObject **args; - PyObject *self_or_null; PyObject *callable; + PyObject *method; args = stack_pointer - oparg; - self_or_null = stack_pointer[-1 - oparg]; - callable = stack_pointer[-2 - oparg]; + callable = stack_pointer[-1 - oparg]; + method = stack_pointer[-2 - oparg]; uint32_t func_version = read_u32(&next_instr[1].cache); ASSERT_KWNAMES_IS_NULL(); DEOPT_IF(tstate->interp->eval_frame, CALL); diff --git a/Python/optimizer.c b/Python/optimizer.c index 6c730aa14b9a47..dbd5142b9101e9 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -602,6 +602,10 @@ translate_bytecode_to_trace( case OPARG_BOTTOM: // Second half of super-instr oparg = orig_oparg & 0xF; break; + case OPARG_SAVE_IP: // op==SAVE_IP; oparg=next instr + oparg = INSTR_IP(instr + offset, code); + break; + default: fprintf(stderr, "opcode=%d, oparg=%d; nuops=%d, i=%d; size=%d, offset=%d\n", @@ -611,6 +615,11 @@ translate_bytecode_to_trace( Py_FatalError("garbled expansion"); } ADD_TO_TRACE(expansion->uops[i].uop, oparg, operand); + if (expansion->uops[i].uop == _PUSH_FRAME) { + assert(i + 1 == nuops); + ADD_TO_TRACE(SAVE_IP, 0, 0); + goto done; + } } break; } diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index d35a16a80e8d00..f183845a667003 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -53,6 +53,7 @@ "OPARG_CACHE_4": 4, "OPARG_TOP": 5, "OPARG_BOTTOM": 6, + "OPARG_SAVE_IP": 7, } INSTR_FMT_PREFIX = "INSTR_FMT_" @@ -344,7 +345,7 @@ def write_metadata(self, metadata_filename: str, pymetadata_filename: str) -> No if instr.kind == "inst" and instr.is_viable_uop(): # Construct a dummy Component -- input/output mappings are not used part = Component(instr, instr.active_caches) - self.write_macro_expansions(instr.name, [part]) + self.write_macro_expansions(instr.name, [part], instr.cache_offset) elif instr.kind == "inst" and variable_used( instr.inst, "oparg1" ): @@ -354,7 +355,7 @@ def write_metadata(self, metadata_filename: str, pymetadata_filename: str) -> No self.write_super_expansions(instr.name) case parsing.Macro(): mac = self.macro_instrs[thing.name] - self.write_macro_expansions(mac.name, mac.parts) + self.write_macro_expansions(mac.name, mac.parts, mac.cache_offset) case parsing.Pseudo(): pass case _: @@ -428,13 +429,22 @@ def add(name: str) -> None: if instr.kind == "op" and instr.is_viable_uop(): add(instr.name) - def write_macro_expansions(self, name: str, parts: MacroParts) -> None: + def write_macro_expansions( + self, name: str, parts: MacroParts, cache_offset: int + ) -> None: """Write the macro expansions for a macro-instruction.""" # TODO: Refactor to share code with write_cody(), is_viaible_uop(), etc. offset = 0 # Cache effect offset expansions: list[tuple[str, int, int]] = [] # [(name, size, offset), ...] for part in parts: if isinstance(part, Component): + # _PUSH_FRAME is super special; it expands to SAVE_IP(next_instr) + _PUSH_FRAME + if part.instr.name == "_PUSH_FRAME": + expansions.append( + ("SAVE_IP", OPARG_SIZES["OPARG_SAVE_IP"], cache_offset) + ) + expansions.append(("_PUSH_FRAME", OPARG_SIZES["OPARG_FULL"], 0)) + continue # All component instructions must be viable uops if not part.instr.is_viable_uop(): # This note just reminds us about macros that cannot diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index aa94dbb07ea1c0..4219fa9c654798 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -59,7 +59,7 @@ class Instruction: block_line: int # First line of block in original code # Computed by constructor - always_exits: bool + always_exits: str # If the block always exits, its last line; else "" has_deopt: bool cache_offset: int cache_effects: list[parsing.CacheEffect] @@ -120,11 +120,13 @@ def __init__(self, inst: parsing.InstDef): def is_viable_uop(self) -> bool: """Whether this instruction is viable as a uop.""" dprint: typing.Callable[..., None] = lambda *args, **kwargs: None - # if self.name.startswith("CALL"): - # dprint = print + if "PY_EXACT" in self.name: + dprint = print if self.name == "EXIT_TRACE": return True # This has 'return frame' but it's okay + if self.name == "_PUSH_FRAME": + return True # Has DISPATCH_INLINED but it's okay if self.always_exits: dprint(f"Skipping {self.name} because it always exits") return False @@ -322,16 +324,16 @@ def extract_block_text(block: parsing.Block) -> tuple[list[str], bool, int]: return blocklines, check_eval_breaker, block_line -def always_exits(lines: list[str]) -> bool: +def always_exits(lines: list[str]) -> str: """Determine whether a block always ends in a return/goto/etc.""" if not lines: - return False + return "" line = lines[-1].rstrip() # Indent must match exactly (TODO: Do something better) if line[:12] != " " * 12: - return False + return "" line = line[12:] - return line.startswith( + if line.startswith( ( "goto ", "return ", @@ -340,4 +342,6 @@ def always_exits(lines: list[str]) -> bool: "Py_UNREACHABLE()", "ERROR_IF(true, ", ) - ) + ): + return line + return "" diff --git a/Tools/cases_generator/stacking.py b/Tools/cases_generator/stacking.py index d457ce01a8f438..aea6d70e1fcba9 100644 --- a/Tools/cases_generator/stacking.py +++ b/Tools/cases_generator/stacking.py @@ -295,6 +295,7 @@ def write_single_instr( [Component(instr, instr.active_caches)], out, tier, + 0, ) except AssertionError as err: raise AssertionError(f"Error writing instruction {instr.name}") from err @@ -306,12 +307,14 @@ def write_macro_instr( parts = [part for part in mac.parts if isinstance(part, Component)] cache_adjust = 0 + always_exits = False for part in mac.parts: match part: case CacheEffect(size=size): cache_adjust += size case Component(instr=instr): cache_adjust += instr.cache_offset + always_exits = instr.always_exits case _: typing.assert_never(part) @@ -321,18 +324,20 @@ def write_macro_instr( out.emit(f"PREDICTED({mac.name});") out.static_assert_family_size(mac.name, family, cache_adjust) try: - write_components(parts, out, TIER_ONE) + write_components(parts, out, TIER_ONE, cache_adjust) except AssertionError as err: raise AssertionError(f"Error writing macro {mac.name}") from err - if cache_adjust: - out.emit(f"next_instr += {cache_adjust};") - out.emit("DISPATCH();") + if not always_exits: + if cache_adjust: + out.emit(f"next_instr += {cache_adjust};") + out.emit("DISPATCH();") def write_components( parts: list[Component], out: Formatter, tier: Tiers, + cache_adjust: int, ) -> None: managers = get_managers(parts) @@ -374,13 +379,24 @@ def write_components( poke.as_stack_effect(lax=True), ) + dispatch_inlined_special_case = False + if mgr is managers[-1] and mgr.instr.always_exits.startswith("DISPATCH_INLINED") and mgr.instr.name == "_PUSH_FRAME": + dispatch_inlined_special_case = True + temp = mgr.final_offset.clone() + temp.deeper(StackEffect(UNUSED)) # Hack + out.stack_adjust(temp.deep, temp.high) + # Use clone() since adjust_inverse() mutates final_offset + mgr.adjust_inverse(mgr.final_offset.clone()) + if cache_adjust: + out.emit(f"next_instr += {cache_adjust};") + if len(parts) == 1: mgr.instr.write_body(out, 0, mgr.active_caches, tier) else: with out.block(""): mgr.instr.write_body(out, -4, mgr.active_caches, tier) - if mgr is managers[-1]: + if mgr is managers[-1] and not dispatch_inlined_special_case: out.stack_adjust(mgr.final_offset.deep, mgr.final_offset.high) # Use clone() since adjust_inverse() mutates final_offset mgr.adjust_inverse(mgr.final_offset.clone()) From 907ff95317c8d7b9221ce95c8823ed8d2cf4861c Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 9 Aug 2023 12:00:18 -0700 Subject: [PATCH 02/19] Fix merge so it works again (I think) --- Python/bytecodes.c | 17 +++++------------ Python/executor_cases.c.h | 20 +++++--------------- Python/generated_cases.c.h | 24 +++++++----------------- 3 files changed, 17 insertions(+), 44 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 7e6d1167b76c08..accd4f798d1b26 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2944,27 +2944,20 @@ dummy_func( GO_TO_INSTRUCTION(CALL_PY_EXACT_ARGS); } - op(_CHECK_CALL_PY_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- method, callable, unused[oparg])) { + op(_CHECK_CALL_PY_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) { ASSERT_KWNAMES_IS_NULL(); DEOPT_IF(tstate->interp->eval_frame, CALL); - int argcount = oparg; - if (self_or_null != NULL) { - args--; - argcount++; - } DEOPT_IF(!PyFunction_Check(callable), CALL); PyFunctionObject *func = (PyFunctionObject *)callable; DEOPT_IF(func->func_version != func_version, CALL); PyCodeObject *code = (PyCodeObject *)func->func_code; - DEOPT_IF(code->co_argcount != argcount, CALL); + DEOPT_IF(code->co_argcount != oparg + (self_or_null != NULL), CALL); DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL); } - op(_INIT_CALL_PY_EXACT_ARGS, (method, callable, args[oparg] -- new_frame: _PyInterpreterFrame*)) { - int is_meth = method != NULL; + op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _PyInterpreterFrame*)) { int argcount = oparg; - if (is_meth) { - callable = method; + if (self_or_null != NULL) { args--; argcount++; } @@ -2987,7 +2980,7 @@ dummy_func( _INIT_CALL_PY_EXACT_ARGS + _PUSH_FRAME; - inst(CALL_PY_WITH_DEFAULTS, (unused/1, func_version/2, method, callable, args[oparg] -- unused)) { + inst(CALL_PY_WITH_DEFAULTS, (unused/1, func_version/2, callable, self_or_null, args[oparg] -- unused)) { ASSERT_KWNAMES_IS_NULL(); DEOPT_IF(tstate->interp->eval_frame, CALL); int argcount = oparg; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index c7fe281895d945..b4d25a4522ada2 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2156,40 +2156,30 @@ case _CHECK_CALL_PY_EXACT_ARGS: { PyObject *self_or_null; PyObject *callable; - PyObject *method; self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; uint32_t func_version = (uint32_t)operand; ASSERT_KWNAMES_IS_NULL(); DEOPT_IF(tstate->interp->eval_frame, CALL); - int argcount = oparg; - if (self_or_null != NULL) { - args--; - argcount++; - } DEOPT_IF(!PyFunction_Check(callable), CALL); PyFunctionObject *func = (PyFunctionObject *)callable; DEOPT_IF(func->func_version != func_version, CALL); PyCodeObject *code = (PyCodeObject *)func->func_code; - DEOPT_IF(code->co_argcount != argcount, CALL); + DEOPT_IF(code->co_argcount != oparg + (self_or_null != NULL), CALL); DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL); - stack_pointer[-2 - oparg] = method; - stack_pointer[-1 - oparg] = callable; break; } case _INIT_CALL_PY_EXACT_ARGS: { PyObject **args; + PyObject *self_or_null; PyObject *callable; - PyObject *method; _PyInterpreterFrame *new_frame; args = stack_pointer - oparg; - callable = stack_pointer[-1 - oparg]; - method = stack_pointer[-2 - oparg]; - int is_meth = method != NULL; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; int argcount = oparg; - if (is_meth) { - callable = method; + if (self_or_null != NULL) { args--; argcount++; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 2137efeb507e82..21e00e629d22e0 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3760,7 +3760,6 @@ PREDICTED(CALL_PY_EXACT_ARGS); PyObject *self_or_null; PyObject *callable; - PyObject *method; PyObject **args; _PyInterpreterFrame *new_frame; // _CHECK_CALL_PY_EXACT_ARGS @@ -3770,29 +3769,20 @@ uint32_t func_version = read_u32(&next_instr[1].cache); ASSERT_KWNAMES_IS_NULL(); DEOPT_IF(tstate->interp->eval_frame, CALL); - int argcount = oparg; - if (self_or_null != NULL) { - args--; - argcount++; - } DEOPT_IF(!PyFunction_Check(callable), CALL); PyFunctionObject *func = (PyFunctionObject *)callable; DEOPT_IF(func->func_version != func_version, CALL); PyCodeObject *code = (PyCodeObject *)func->func_code; - DEOPT_IF(code->co_argcount != argcount, CALL); + DEOPT_IF(code->co_argcount != oparg + (self_or_null != NULL), CALL); DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL); } - stack_pointer[-2 - oparg] = method; - stack_pointer[-1 - oparg] = callable; // _INIT_CALL_PY_EXACT_ARGS args = stack_pointer - oparg; - callable = stack_pointer[-1 - oparg]; - method = stack_pointer[-2 - oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; { - int is_meth = method != NULL; int argcount = oparg; - if (is_meth) { - callable = method; + if (self_or_null != NULL) { args--; argcount++; } @@ -3815,11 +3805,11 @@ TARGET(CALL_PY_WITH_DEFAULTS) { PyObject **args; + PyObject *self_or_null; PyObject *callable; - PyObject *method; args = stack_pointer - oparg; - callable = stack_pointer[-1 - oparg]; - method = stack_pointer[-2 - oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; uint32_t func_version = read_u32(&next_instr[1].cache); ASSERT_KWNAMES_IS_NULL(); DEOPT_IF(tstate->interp->eval_frame, CALL); From 2c6be6d65290d85a1c43f4491202dbbbb1e38778 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 9 Aug 2023 12:15:46 -0700 Subject: [PATCH 03/19] Split into finer-grained uops --- Include/internal/pycore_opcode_metadata.h | 20 ++++++++++++-------- Python/bytecodes.c | 16 +++++++++++++--- Python/executor_cases.c.h | 16 ++++++++++++++-- Python/generated_cases.c.h | 13 +++++++++++-- 4 files changed, 50 insertions(+), 15 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 3957c645c45336..3ebbf3fcc8c8a2 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -52,12 +52,14 @@ #define _ITER_CHECK_RANGE 328 #define _IS_ITER_EXHAUSTED_RANGE 329 #define _ITER_NEXT_RANGE 330 -#define _CHECK_CALL_PY_EXACT_ARGS 331 -#define _INIT_CALL_PY_EXACT_ARGS 332 -#define _PUSH_FRAME 333 -#define _POP_JUMP_IF_FALSE 334 -#define _POP_JUMP_IF_TRUE 335 -#define JUMP_TO_TOP 336 +#define _CHECK_PEP_523 331 +#define _CHECK_FUNCTION_EXACT_ARGS 332 +#define _CHECK_STACK_SPACE 333 +#define _INIT_CALL_PY_EXACT_ARGS 334 +#define _PUSH_FRAME 335 +#define _POP_JUMP_IF_FALSE 336 +#define _POP_JUMP_IF_TRUE 337 +#define JUMP_TO_TOP 338 #ifndef NEED_OPCODE_METADATA extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump); @@ -1341,7 +1343,7 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN [GET_YIELD_FROM_ITER] = { .nuops = 1, .uops = { { GET_YIELD_FROM_ITER, 0, 0 } } }, [WITH_EXCEPT_START] = { .nuops = 1, .uops = { { WITH_EXCEPT_START, 0, 0 } } }, [PUSH_EXC_INFO] = { .nuops = 1, .uops = { { PUSH_EXC_INFO, 0, 0 } } }, - [CALL_PY_EXACT_ARGS] = { .nuops = 4, .uops = { { _CHECK_CALL_PY_EXACT_ARGS, 2, 1 }, { _INIT_CALL_PY_EXACT_ARGS, 0, 0 }, { SAVE_IP, 7, 3 }, { _PUSH_FRAME, 0, 0 } } }, + [CALL_PY_EXACT_ARGS] = { .nuops = 6, .uops = { { _CHECK_PEP_523, 0, 0 }, { _CHECK_FUNCTION_EXACT_ARGS, 2, 1 }, { _CHECK_STACK_SPACE, 0, 0 }, { _INIT_CALL_PY_EXACT_ARGS, 0, 0 }, { SAVE_IP, 7, 3 }, { _PUSH_FRAME, 0, 0 } } }, [CALL_NO_KW_TYPE_1] = { .nuops = 1, .uops = { { CALL_NO_KW_TYPE_1, 0, 0 } } }, [CALL_NO_KW_STR_1] = { .nuops = 1, .uops = { { CALL_NO_KW_STR_1, 0, 0 } } }, [CALL_NO_KW_TUPLE_1] = { .nuops = 1, .uops = { { CALL_NO_KW_TUPLE_1, 0, 0 } } }, @@ -1395,7 +1397,9 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = { [_ITER_CHECK_RANGE] = "_ITER_CHECK_RANGE", [_IS_ITER_EXHAUSTED_RANGE] = "_IS_ITER_EXHAUSTED_RANGE", [_ITER_NEXT_RANGE] = "_ITER_NEXT_RANGE", - [_CHECK_CALL_PY_EXACT_ARGS] = "_CHECK_CALL_PY_EXACT_ARGS", + [_CHECK_PEP_523] = "_CHECK_PEP_523", + [_CHECK_FUNCTION_EXACT_ARGS] = "_CHECK_FUNCTION_EXACT_ARGS", + [_CHECK_STACK_SPACE] = "_CHECK_STACK_SPACE", [_INIT_CALL_PY_EXACT_ARGS] = "_INIT_CALL_PY_EXACT_ARGS", [_PUSH_FRAME] = "_PUSH_FRAME", [_POP_JUMP_IF_FALSE] = "_POP_JUMP_IF_FALSE", diff --git a/Python/bytecodes.c b/Python/bytecodes.c index accd4f798d1b26..48e523264d8e62 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2944,14 +2944,22 @@ dummy_func( GO_TO_INSTRUCTION(CALL_PY_EXACT_ARGS); } - op(_CHECK_CALL_PY_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) { - ASSERT_KWNAMES_IS_NULL(); + op(_CHECK_PEP_523, (--)) { DEOPT_IF(tstate->interp->eval_frame, CALL); + } + + op(_CHECK_FUNCTION_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) { + ASSERT_KWNAMES_IS_NULL(); DEOPT_IF(!PyFunction_Check(callable), CALL); PyFunctionObject *func = (PyFunctionObject *)callable; DEOPT_IF(func->func_version != func_version, CALL); PyCodeObject *code = (PyCodeObject *)func->func_code; DEOPT_IF(code->co_argcount != oparg + (self_or_null != NULL), CALL); + } + + op(_CHECK_STACK_SPACE, (callable, unused, unused[oparg] -- callable, unused, unused[oparg])) { + PyFunctionObject *func = (PyFunctionObject *)callable; + PyCodeObject *code = (PyCodeObject *)func->func_code; DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL); } @@ -2976,7 +2984,9 @@ dummy_func( macro(CALL_PY_EXACT_ARGS) = unused/1 + // Skip over the counter - _CHECK_CALL_PY_EXACT_ARGS + + _CHECK_PEP_523 + + _CHECK_FUNCTION_EXACT_ARGS + + _CHECK_STACK_SPACE + _INIT_CALL_PY_EXACT_ARGS + _PUSH_FRAME; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index b4d25a4522ada2..d76c66ee304df1 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2153,19 +2153,31 @@ break; } - case _CHECK_CALL_PY_EXACT_ARGS: { + case _CHECK_PEP_523: { + DEOPT_IF(tstate->interp->eval_frame, CALL); + break; + } + + case _CHECK_FUNCTION_EXACT_ARGS: { PyObject *self_or_null; PyObject *callable; self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; uint32_t func_version = (uint32_t)operand; ASSERT_KWNAMES_IS_NULL(); - DEOPT_IF(tstate->interp->eval_frame, CALL); DEOPT_IF(!PyFunction_Check(callable), CALL); PyFunctionObject *func = (PyFunctionObject *)callable; DEOPT_IF(func->func_version != func_version, CALL); PyCodeObject *code = (PyCodeObject *)func->func_code; DEOPT_IF(code->co_argcount != oparg + (self_or_null != NULL), CALL); + break; + } + + case _CHECK_STACK_SPACE: { + PyObject *callable; + callable = stack_pointer[-2 - oparg]; + PyFunctionObject *func = (PyFunctionObject *)callable; + PyCodeObject *code = (PyCodeObject *)func->func_code; DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 21e00e629d22e0..f13b9066f937b2 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3762,18 +3762,27 @@ PyObject *callable; PyObject **args; _PyInterpreterFrame *new_frame; - // _CHECK_CALL_PY_EXACT_ARGS + // _CHECK_PEP_523 + { + DEOPT_IF(tstate->interp->eval_frame, CALL); + } + // _CHECK_FUNCTION_EXACT_ARGS self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; { uint32_t func_version = read_u32(&next_instr[1].cache); ASSERT_KWNAMES_IS_NULL(); - DEOPT_IF(tstate->interp->eval_frame, CALL); DEOPT_IF(!PyFunction_Check(callable), CALL); PyFunctionObject *func = (PyFunctionObject *)callable; DEOPT_IF(func->func_version != func_version, CALL); PyCodeObject *code = (PyCodeObject *)func->func_code; DEOPT_IF(code->co_argcount != oparg + (self_or_null != NULL), CALL); + } + // _CHECK_STACK_SPACE + callable = stack_pointer[-2 - oparg]; + { + PyFunctionObject *func = (PyFunctionObject *)callable; + PyCodeObject *code = (PyCodeObject *)func->func_code; DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL); } // _INIT_CALL_PY_EXACT_ARGS From 6d78ff2926d558de0508be62c8f23c99962202b5 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 9 Aug 2023 17:24:45 -0700 Subject: [PATCH 04/19] Fix type error in stacking.py --- Tools/cases_generator/stacking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/cases_generator/stacking.py b/Tools/cases_generator/stacking.py index aea6d70e1fcba9..86b9ee92c80be5 100644 --- a/Tools/cases_generator/stacking.py +++ b/Tools/cases_generator/stacking.py @@ -307,7 +307,7 @@ def write_macro_instr( parts = [part for part in mac.parts if isinstance(part, Component)] cache_adjust = 0 - always_exits = False + always_exits = "" for part in mac.parts: match part: case CacheEffect(size=size): From 0d8e66c91da65571bd990a3cf9e092fa68d264fb Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 9 Aug 2023 17:44:59 -0700 Subject: [PATCH 05/19] Add test --- Lib/test/test_capi/test_misc.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 001d37de8e0eb3..36b8e1fa993b98 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2618,6 +2618,23 @@ def testfunc(it): with self.assertRaises(StopIteration): next(it) + def test_call_py_exact_args(self): + def testfunc(n): + def dummy(x): + return x+1 + for i in range(n): + dummy(i) + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + testfunc(10) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = {opname for opname, _, _ in ex} + self.assertIn("_PUSH_FRAME", uops) + + if __name__ == "__main__": unittest.main() From b75f30eb37c8ac68a014a7a3318e549e9e4d4ea3 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 9 Aug 2023 18:21:20 -0700 Subject: [PATCH 06/19] Add comment explaining _PUSH_FRAME's unused output effect --- Python/bytecodes.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 48e523264d8e62..e749348bdae488 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2977,6 +2977,9 @@ dummy_func( } } + // The 'unused' output effect represents the return value + // (which will be pushed when the frame returns). + // It is needed so CALL_PY_EXACT_ARGS matches its family. op(_PUSH_FRAME, (new_frame: _PyInterpreterFrame* -- unused)) { frame->return_offset = 0; DISPATCH_INLINED(new_frame); From 61c2822cde6b709855c1052df38ed05cb059b7af Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 10 Aug 2023 07:53:35 -0700 Subject: [PATCH 07/19] Make PUSH_FRAME special case a little less myterious --- Tools/cases_generator/stacking.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Tools/cases_generator/stacking.py b/Tools/cases_generator/stacking.py index 86b9ee92c80be5..d9eaacfcbeb974 100644 --- a/Tools/cases_generator/stacking.py +++ b/Tools/cases_generator/stacking.py @@ -382,9 +382,8 @@ def write_components( dispatch_inlined_special_case = False if mgr is managers[-1] and mgr.instr.always_exits.startswith("DISPATCH_INLINED") and mgr.instr.name == "_PUSH_FRAME": dispatch_inlined_special_case = True - temp = mgr.final_offset.clone() - temp.deeper(StackEffect(UNUSED)) # Hack - out.stack_adjust(temp.deep, temp.high) + # Adjust stack to min_offset (input effects materialized) + out.stack_adjust(mgr.min_offset.deep, mgr.min_offset.high) # Use clone() since adjust_inverse() mutates final_offset mgr.adjust_inverse(mgr.final_offset.clone()) if cache_adjust: @@ -397,6 +396,7 @@ def write_components( mgr.instr.write_body(out, -4, mgr.active_caches, tier) if mgr is managers[-1] and not dispatch_inlined_special_case: + # TODO: Explain why this adjustment is needed. out.stack_adjust(mgr.final_offset.deep, mgr.final_offset.high) # Use clone() since adjust_inverse() mutates final_offset mgr.adjust_inverse(mgr.final_offset.clone()) From f73ea90f72f9eeef25f6eb9ada1b83a6cbca54e1 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 10 Aug 2023 15:46:09 -0700 Subject: [PATCH 08/19] Rename Instruction.write to write_case_body --- Tools/cases_generator/generate_cases.py | 5 +++-- Tools/cases_generator/instructions.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index f183845a667003..3563f16a5978a6 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -22,6 +22,7 @@ PseudoInstruction, StackEffect, OverriddenInstructionPlaceHolder, + TIER_ONE, TIER_TWO, ) import parsing @@ -597,7 +598,7 @@ def write_executor_instructions( n_instrs += 1 self.out.emit("") with self.out.block(f"case {thing.name}:"): - instr.write(self.out, tier=TIER_TWO) + instr.write_case_body(self.out, tier=TIER_TWO) if instr.check_eval_breaker: self.out.emit("CHECK_EVAL_BREAKER();") self.out.emit("break;") @@ -630,7 +631,7 @@ def write_instr(self, instr: Instruction) -> None: with self.out.block(f"TARGET({name})"): if instr.predicted: self.out.emit(f"PREDICTED({name});") - instr.write(self.out) + instr.write_case_body(self.out, tier=TIER_ONE) if not instr.always_exits: if instr.check_eval_breaker: self.out.emit("CHECK_EVAL_BREAKER();") diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index 4219fa9c654798..c52f0dcb0ced62 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -142,7 +142,7 @@ def is_viable_uop(self) -> bool: res = False return res - def write(self, out: Formatter, tier: Tiers = TIER_ONE) -> None: + def write_case_body(self, out: Formatter, tier: Tiers) -> None: """Write one instruction, sans prologue and epilogue.""" # Write a static assertion that a family's cache size is correct From 12910fcd18be115e99a7575d38c5e7967a97fe45 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 10 Aug 2023 16:01:28 -0700 Subject: [PATCH 09/19] Move next_instr update to a more logical place --- Tools/cases_generator/generate_cases.py | 2 ++ Tools/cases_generator/instructions.py | 8 -------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 3563f16a5978a6..d84ee47bc8a5bf 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -633,6 +633,8 @@ def write_instr(self, instr: Instruction) -> None: self.out.emit(f"PREDICTED({name});") instr.write_case_body(self.out, tier=TIER_ONE) if not instr.always_exits: + if instr.cache_offset: + self.out.emit(f"next_instr += {instr.cache_offset};") if instr.check_eval_breaker: self.out.emit("CHECK_EVAL_BREAKER();") self.out.emit(f"DISPATCH();") diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index c52f0dcb0ced62..7094320a4a3862 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -151,14 +151,6 @@ def write_case_body(self, out: Formatter, tier: Tiers) -> None: # Write input stack effect variable declarations and initializations stacking.write_single_instr(self, out, tier) - # Skip the rest if the block always exits - if self.always_exits: - return - - # Write cache effect - if tier == TIER_ONE and self.cache_offset: - out.emit(f"next_instr += {self.cache_offset};") - def write_body( self, out: Formatter, From 2fafa2c16116af63834792c06ba88d8f2c22043f Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 10 Aug 2023 16:19:06 -0700 Subject: [PATCH 10/19] Don't recompute macro cache offset --- Tools/cases_generator/stacking.py | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/Tools/cases_generator/stacking.py b/Tools/cases_generator/stacking.py index d9eaacfcbeb974..ad63bbbdb0505c 100644 --- a/Tools/cases_generator/stacking.py +++ b/Tools/cases_generator/stacking.py @@ -305,31 +305,18 @@ def write_macro_instr( mac: MacroInstruction, out: Formatter, family: Family | None ) -> None: parts = [part for part in mac.parts if isinstance(part, Component)] - - cache_adjust = 0 - always_exits = "" - for part in mac.parts: - match part: - case CacheEffect(size=size): - cache_adjust += size - case Component(instr=instr): - cache_adjust += instr.cache_offset - always_exits = instr.always_exits - case _: - typing.assert_never(part) - out.emit("") with out.block(f"TARGET({mac.name})"): if mac.predicted: out.emit(f"PREDICTED({mac.name});") - out.static_assert_family_size(mac.name, family, cache_adjust) + out.static_assert_family_size(mac.name, family, mac.cache_offset) try: - write_components(parts, out, TIER_ONE, cache_adjust) + write_components(parts, out, TIER_ONE, mac.cache_offset) except AssertionError as err: raise AssertionError(f"Error writing macro {mac.name}") from err - if not always_exits: - if cache_adjust: - out.emit(f"next_instr += {cache_adjust};") + if not parts[-1].instr.always_exits: + if mac.cache_offset: + out.emit(f"next_instr += {mac.cache_offset};") out.emit("DISPATCH();") @@ -337,7 +324,7 @@ def write_components( parts: list[Component], out: Formatter, tier: Tiers, - cache_adjust: int, + cache_offset: int, ) -> None: managers = get_managers(parts) @@ -386,8 +373,8 @@ def write_components( out.stack_adjust(mgr.min_offset.deep, mgr.min_offset.high) # Use clone() since adjust_inverse() mutates final_offset mgr.adjust_inverse(mgr.final_offset.clone()) - if cache_adjust: - out.emit(f"next_instr += {cache_adjust};") + if cache_offset: + out.emit(f"next_instr += {cache_offset};") if len(parts) == 1: mgr.instr.write_body(out, 0, mgr.active_caches, tier) From 2717b0738b3c25b996288f19579d125c9ffda830 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 10 Aug 2023 16:22:58 -0700 Subject: [PATCH 11/19] Fold and refactor long line in stacking.py --- Tools/cases_generator/stacking.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Tools/cases_generator/stacking.py b/Tools/cases_generator/stacking.py index ad63bbbdb0505c..c4f5ee29ef41af 100644 --- a/Tools/cases_generator/stacking.py +++ b/Tools/cases_generator/stacking.py @@ -366,9 +366,12 @@ def write_components( poke.as_stack_effect(lax=True), ) - dispatch_inlined_special_case = False - if mgr is managers[-1] and mgr.instr.always_exits.startswith("DISPATCH_INLINED") and mgr.instr.name == "_PUSH_FRAME": - dispatch_inlined_special_case = True + dispatch_inlined_special_case = ( + mgr is managers[-1] + and mgr.instr.always_exits.startswith("DISPATCH_INLINED") + and mgr.instr.name == "_PUSH_FRAME" + ) + if dispatch_inlined_special_case: # Adjust stack to min_offset (input effects materialized) out.stack_adjust(mgr.min_offset.deep, mgr.min_offset.high) # Use clone() since adjust_inverse() mutates final_offset From e48790885a448f1065411ad0eda0e69ca4896808 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 10 Aug 2023 16:24:33 -0700 Subject: [PATCH 12/19] Fold long lines in generate_cases.py --- Tools/cases_generator/generate_cases.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index d84ee47bc8a5bf..c0a11bcd23dd7f 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -346,7 +346,9 @@ def write_metadata(self, metadata_filename: str, pymetadata_filename: str) -> No if instr.kind == "inst" and instr.is_viable_uop(): # Construct a dummy Component -- input/output mappings are not used part = Component(instr, instr.active_caches) - self.write_macro_expansions(instr.name, [part], instr.cache_offset) + self.write_macro_expansions( + instr.name, [part], instr.cache_offset + ) elif instr.kind == "inst" and variable_used( instr.inst, "oparg1" ): @@ -356,7 +358,9 @@ def write_metadata(self, metadata_filename: str, pymetadata_filename: str) -> No self.write_super_expansions(instr.name) case parsing.Macro(): mac = self.macro_instrs[thing.name] - self.write_macro_expansions(mac.name, mac.parts, mac.cache_offset) + self.write_macro_expansions( + mac.name, mac.parts, mac.cache_offset + ) case parsing.Pseudo(): pass case _: @@ -562,7 +566,9 @@ def write_instructions( case parsing.Macro(): n_macros += 1 mac = self.macro_instrs[thing.name] - stacking.write_macro_instr(mac, self.out, self.families.get(mac.name)) + stacking.write_macro_instr( + mac, self.out, self.families.get(mac.name) + ) # self.write_macro(self.macro_instrs[thing.name]) case parsing.Pseudo(): pass From 1d549af5f4b0d28f22da4561ba0d1ba16e226823 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 10 Aug 2023 16:52:09 -0700 Subject: [PATCH 13/19] Don't emit static assert to executor cases --- Python/executor_cases.c.h | 9 --------- Tools/cases_generator/generate_cases.py | 1 + Tools/cases_generator/instructions.py | 5 ----- 3 files changed, 1 insertion(+), 14 deletions(-) diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index d76c66ee304df1..f63c5c60fdc085 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -103,7 +103,6 @@ } case TO_BOOL: { - static_assert(INLINE_CACHE_ENTRIES_TO_BOOL == 3, "incorrect cache size"); PyObject *value; PyObject *res; value = stack_pointer[-1]; @@ -363,7 +362,6 @@ } case BINARY_SUBSCR: { - static_assert(INLINE_CACHE_ENTRIES_BINARY_SUBSCR == 1, "incorrect cache size"); PyObject *sub; PyObject *container; PyObject *res; @@ -557,7 +555,6 @@ } case STORE_SUBSCR: { - static_assert(INLINE_CACHE_ENTRIES_STORE_SUBSCR == 1, "incorrect cache size"); PyObject *sub; PyObject *container; PyObject *v; @@ -862,7 +859,6 @@ } case UNPACK_SEQUENCE: { - static_assert(INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE == 1, "incorrect cache size"); PyObject *seq; seq = stack_pointer[-1]; #if ENABLE_SPECIALIZATION @@ -950,7 +946,6 @@ } case STORE_ATTR: { - static_assert(INLINE_CACHE_ENTRIES_STORE_ATTR == 4, "incorrect cache size"); PyObject *owner; PyObject *v; owner = stack_pointer[-1]; @@ -1061,7 +1056,6 @@ } case LOAD_GLOBAL: { - static_assert(INLINE_CACHE_ENTRIES_LOAD_GLOBAL == 4, "incorrect cache size"); PyObject *res; PyObject *null = NULL; #if ENABLE_SPECIALIZATION @@ -1554,7 +1548,6 @@ } case LOAD_ATTR: { - static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); PyObject *owner; PyObject *attr; PyObject *self_or_null = NULL; @@ -1648,7 +1641,6 @@ } case COMPARE_OP: { - static_assert(INLINE_CACHE_ENTRIES_COMPARE_OP == 1, "incorrect cache size"); PyObject *right; PyObject *left; PyObject *res; @@ -2717,7 +2709,6 @@ } case BINARY_OP: { - static_assert(INLINE_CACHE_ENTRIES_BINARY_OP == 1, "incorrect cache size"); PyObject *rhs; PyObject *lhs; PyObject *res; diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index c0a11bcd23dd7f..6e80a06da386d1 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -637,6 +637,7 @@ def write_instr(self, instr: Instruction) -> None: with self.out.block(f"TARGET({name})"): if instr.predicted: self.out.emit(f"PREDICTED({name});") + self.out.static_assert_family_size(instr.name, instr.family, instr.cache_offset) instr.write_case_body(self.out, tier=TIER_ONE) if not instr.always_exits: if instr.cache_offset: diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index 7094320a4a3862..27cfb2c20ea9e0 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -144,11 +144,6 @@ def is_viable_uop(self) -> bool: def write_case_body(self, out: Formatter, tier: Tiers) -> None: """Write one instruction, sans prologue and epilogue.""" - - # Write a static assertion that a family's cache size is correct - out.static_assert_family_size(self.name, self.family, self.cache_offset) - - # Write input stack effect variable declarations and initializations stacking.write_single_instr(self, out, tier) def write_body( From f40fb1f82267bcc39b0ded8b5e1f2f0b731a24f9 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 10 Aug 2023 16:55:09 -0700 Subject: [PATCH 14/19] Factor away write_case_body (formerly Instruction.write) --- Tools/cases_generator/generate_cases.py | 4 ++-- Tools/cases_generator/instructions.py | 4 ---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 6e80a06da386d1..32344ffa8c011e 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -604,7 +604,7 @@ def write_executor_instructions( n_instrs += 1 self.out.emit("") with self.out.block(f"case {thing.name}:"): - instr.write_case_body(self.out, tier=TIER_TWO) + stacking.write_single_instr(instr, self.out, tier=TIER_TWO) if instr.check_eval_breaker: self.out.emit("CHECK_EVAL_BREAKER();") self.out.emit("break;") @@ -638,7 +638,7 @@ def write_instr(self, instr: Instruction) -> None: if instr.predicted: self.out.emit(f"PREDICTED({name});") self.out.static_assert_family_size(instr.name, instr.family, instr.cache_offset) - instr.write_case_body(self.out, tier=TIER_ONE) + stacking.write_single_instr(instr, self.out, tier=TIER_ONE) if not instr.always_exits: if instr.cache_offset: self.out.emit(f"next_instr += {instr.cache_offset};") diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index 27cfb2c20ea9e0..1fb291eeca1d0c 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -142,10 +142,6 @@ def is_viable_uop(self) -> bool: res = False return res - def write_case_body(self, out: Formatter, tier: Tiers) -> None: - """Write one instruction, sans prologue and epilogue.""" - stacking.write_single_instr(self, out, tier) - def write_body( self, out: Formatter, From 4f6f8f8f44761ae3945cc873497bbfe2ec07102b Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 11 Aug 2023 14:58:02 -0700 Subject: [PATCH 15/19] Fold long lines --- Tools/cases_generator/generate_cases.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 32344ffa8c011e..2a695c45684cc1 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -604,7 +604,9 @@ def write_executor_instructions( n_instrs += 1 self.out.emit("") with self.out.block(f"case {thing.name}:"): - stacking.write_single_instr(instr, self.out, tier=TIER_TWO) + stacking.write_single_instr( + instr, self.out, tier=TIER_TWO + ) if instr.check_eval_breaker: self.out.emit("CHECK_EVAL_BREAKER();") self.out.emit("break;") @@ -637,7 +639,9 @@ def write_instr(self, instr: Instruction) -> None: with self.out.block(f"TARGET({name})"): if instr.predicted: self.out.emit(f"PREDICTED({name});") - self.out.static_assert_family_size(instr.name, instr.family, instr.cache_offset) + self.out.static_assert_family_size( + instr.name, instr.family, instr.cache_offset + ) stacking.write_single_instr(instr, self.out, tier=TIER_ONE) if not instr.always_exits: if instr.cache_offset: From 6facc8dd838998999c117ddf8c225ad09bebfe47 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 11 Aug 2023 14:19:21 -0700 Subject: [PATCH 16/19] Make less of a special case of _PUSH_FRAME Instead, the special case is an opcode using SAVE_FRAME_STATE(). Introducing #if TIER_ONE and #if TIER_TWO so we can implement _PUSH_FRAME differently for both tiers. --- Python/bytecodes.c | 16 +++++++++++++++- Python/ceval.c | 1 + Python/ceval_macros.h | 9 +++++++-- Python/executor.c | 18 +++++++----------- Python/executor_cases.c.h | 16 +++++++++++++++- Python/generated_cases.c.h | 16 +++++++++++++++- Tools/cases_generator/instructions.py | 8 ++++---- Tools/cases_generator/stacking.py | 12 ++++-------- 8 files changed, 68 insertions(+), 28 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index e749348bdae488..9919c4141c9099 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2981,8 +2981,22 @@ dummy_func( // (which will be pushed when the frame returns). // It is needed so CALL_PY_EXACT_ARGS matches its family. op(_PUSH_FRAME, (new_frame: _PyInterpreterFrame* -- unused)) { + // Write it out explicitly because it's subtly different. + // Eventually this should be the only occurrence of this code. frame->return_offset = 0; - DISPATCH_INLINED(new_frame); + assert(tstate->interp->eval_frame == NULL); + SAVE_FRAME_STATE(); // Signals to the code generator + new_frame->previous = frame; + CALL_STAT_INC(inlined_py_calls); + #if TIER_ONE + frame = cframe.current_frame = new_frame; + goto start_frame; + #endif + #if TIER_TWO + frame = tstate->cframe->current_frame = new_frame; + stack_pointer = _PyFrame_GetStackPointer(frame); + ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; + #endif } macro(CALL_PY_EXACT_ARGS) = diff --git a/Python/ceval.c b/Python/ceval.c index b966399a342d08..2370636d765d9c 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -770,6 +770,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int #endif { +#define TIER_ONE 1 #include "generated_cases.c.h" /* INSTRUMENTED_LINE has to be here, rather than in bytecodes.c, diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 8dc8b754485856..77f760f0bb5995 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -103,11 +103,16 @@ DISPATCH_GOTO(); \ } +#define SAVE_FRAME_STATE() \ + do { \ + frame->prev_instr = next_instr - 1; \ + _PyFrame_SetStackPointer(frame, stack_pointer); \ + } while (0) + #define DISPATCH_INLINED(NEW_FRAME) \ do { \ assert(tstate->interp->eval_frame == NULL); \ - _PyFrame_SetStackPointer(frame, stack_pointer); \ - frame->prev_instr = next_instr - 1; \ + SAVE_FRAME_STATE(); \ (NEW_FRAME)->previous = frame; \ frame = cframe.current_frame = (NEW_FRAME); \ CALL_STAT_INC(inlined_py_calls); \ diff --git a/Python/executor.c b/Python/executor.c index cd673a7beeef88..c18ba6d4ade3e5 100644 --- a/Python/executor.c +++ b/Python/executor.c @@ -30,17 +30,12 @@ #undef ENABLE_SPECIALIZATION #define ENABLE_SPECIALIZATION 0 -#undef DISPATCH_INLINED -#define DISPATCH_INLINED(NEW_FRAME) \ - do { \ - assert(tstate->interp->eval_frame == NULL); \ - _PyFrame_SetStackPointer(frame, stack_pointer); \ - frame->prev_instr -= 1; \ - (NEW_FRAME)->previous = frame; \ - frame = tstate->cframe->current_frame = (NEW_FRAME); \ - CALL_STAT_INC(inlined_py_calls); \ - stack_pointer = _PyFrame_GetStackPointer(frame); \ - ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; \ +#undef SAVE_FRAME_STATE +#define SAVE_FRAME_STATE() \ + do { \ + /* Assume preceding SAVE_IP has set frame->prev_instr */ \ + frame->prev_instr--; \ + _PyFrame_SetStackPointer(frame, stack_pointer); \ } while (0) @@ -94,6 +89,7 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject OBJECT_STAT_INC(optimization_uops_executed); switch (opcode) { +#define TIER_TWO 2 #include "executor_cases.c.h" default: diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index f63c5c60fdc085..4ace06857eb424 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2203,8 +2203,22 @@ _PyInterpreterFrame *new_frame; new_frame = (_PyInterpreterFrame *)stack_pointer[-1]; STACK_SHRINK(1); + // Write it out explicitly because it's subtly different. + // Eventually this should be the only occurrence of this code. frame->return_offset = 0; - DISPATCH_INLINED(new_frame); + assert(tstate->interp->eval_frame == NULL); + SAVE_FRAME_STATE(); // Signals to the code generator + new_frame->previous = frame; + CALL_STAT_INC(inlined_py_calls); + #if TIER_ONE + frame = cframe.current_frame = new_frame; + goto start_frame; + #endif + #if TIER_TWO + frame = tstate->cframe->current_frame = new_frame; + stack_pointer = _PyFrame_GetStackPointer(frame); + ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; + #endif break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index f13b9066f937b2..a42457d05faf03 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3807,8 +3807,22 @@ STACK_SHRINK(2); next_instr += 3; { + // Write it out explicitly because it's subtly different. + // Eventually this should be the only occurrence of this code. frame->return_offset = 0; - DISPATCH_INLINED(new_frame); + assert(tstate->interp->eval_frame == NULL); + SAVE_FRAME_STATE(); // Signals to the code generator + new_frame->previous = frame; + CALL_STAT_INC(inlined_py_calls); + #if TIER_ONE + frame = cframe.current_frame = new_frame; + goto start_frame; + #endif + #if TIER_TWO + frame = tstate->cframe->current_frame = new_frame; + stack_pointer = _PyFrame_GetStackPointer(frame); + ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; + #endif } } diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index 1fb291eeca1d0c..6c52cf3e8a3d09 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -60,6 +60,7 @@ class Instruction: # Computed by constructor always_exits: str # If the block always exits, its last line; else "" + save_frame_state: bool # Whether the instruction uses SAVE_FRAME_STATE() has_deopt: bool cache_offset: int cache_effects: list[parsing.CacheEffect] @@ -83,6 +84,7 @@ def __init__(self, inst: parsing.InstDef): self.block ) self.always_exits = always_exits(self.block_text) + self.save_frame_state = variable_used(self.inst, "SAVE_FRAME_STATE") self.has_deopt = variable_used(self.inst, "DEOPT_IF") self.cache_effects = [ effect for effect in inst.inputs if isinstance(effect, parsing.CacheEffect) @@ -120,15 +122,13 @@ def __init__(self, inst: parsing.InstDef): def is_viable_uop(self) -> bool: """Whether this instruction is viable as a uop.""" dprint: typing.Callable[..., None] = lambda *args, **kwargs: None - if "PY_EXACT" in self.name: + if "PUSH_FRAME" in self.name: dprint = print if self.name == "EXIT_TRACE": return True # This has 'return frame' but it's okay - if self.name == "_PUSH_FRAME": - return True # Has DISPATCH_INLINED but it's okay if self.always_exits: - dprint(f"Skipping {self.name} because it always exits") + dprint(f"Skipping {self.name} because it always exits: {self.always_exits}") return False if len(self.active_caches) > 1: # print(f"Skipping {self.name} because it has >1 cache entries") diff --git a/Tools/cases_generator/stacking.py b/Tools/cases_generator/stacking.py index c4f5ee29ef41af..a721cba0247a8a 100644 --- a/Tools/cases_generator/stacking.py +++ b/Tools/cases_generator/stacking.py @@ -1,6 +1,7 @@ import dataclasses import typing +from flags import variable_used_unspecialized from formatting import ( Formatter, UNUSED, @@ -314,7 +315,7 @@ def write_macro_instr( write_components(parts, out, TIER_ONE, mac.cache_offset) except AssertionError as err: raise AssertionError(f"Error writing macro {mac.name}") from err - if not parts[-1].instr.always_exits: + if not parts[-1].instr.always_exits and not parts[-1].instr.save_frame_state: if mac.cache_offset: out.emit(f"next_instr += {mac.cache_offset};") out.emit("DISPATCH();") @@ -366,12 +367,7 @@ def write_components( poke.as_stack_effect(lax=True), ) - dispatch_inlined_special_case = ( - mgr is managers[-1] - and mgr.instr.always_exits.startswith("DISPATCH_INLINED") - and mgr.instr.name == "_PUSH_FRAME" - ) - if dispatch_inlined_special_case: + if mgr.instr.save_frame_state: # Adjust stack to min_offset (input effects materialized) out.stack_adjust(mgr.min_offset.deep, mgr.min_offset.high) # Use clone() since adjust_inverse() mutates final_offset @@ -385,7 +381,7 @@ def write_components( with out.block(""): mgr.instr.write_body(out, -4, mgr.active_caches, tier) - if mgr is managers[-1] and not dispatch_inlined_special_case: + if mgr is managers[-1] and not mgr.instr.save_frame_state: # TODO: Explain why this adjustment is needed. out.stack_adjust(mgr.final_offset.deep, mgr.final_offset.high) # Use clone() since adjust_inverse() mutates final_offset From 94630d49b3283657953d44b7fbe2a8a36d624721 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 11 Aug 2023 14:45:31 -0700 Subject: [PATCH 17/19] Stop special-casing _PUSH_FRAME altogether Instead, we special-case SAVE_IP: - Its Tier 2 expansion sets oparg to the instruction offset - In Tier 1 it is a no-op (and skipped if present in a macro) --- Python/bytecodes.c | 1 + Tools/cases_generator/generate_cases.py | 12 ++++-------- Tools/cases_generator/stacking.py | 16 ++++++++++++++-- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 9919c4141c9099..cbbd6d48daae75 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3005,6 +3005,7 @@ dummy_func( _CHECK_FUNCTION_EXACT_ARGS + _CHECK_STACK_SPACE + _INIT_CALL_PY_EXACT_ARGS + + SAVE_IP + // Tier 2 only; special-cased oparg _PUSH_FRAME; inst(CALL_PY_WITH_DEFAULTS, (unused/1, func_version/2, callable, self_or_null, args[oparg] -- unused)) { diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 2a695c45684cc1..6050fcf84479e9 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -443,13 +443,6 @@ def write_macro_expansions( expansions: list[tuple[str, int, int]] = [] # [(name, size, offset), ...] for part in parts: if isinstance(part, Component): - # _PUSH_FRAME is super special; it expands to SAVE_IP(next_instr) + _PUSH_FRAME - if part.instr.name == "_PUSH_FRAME": - expansions.append( - ("SAVE_IP", OPARG_SIZES["OPARG_SAVE_IP"], cache_offset) - ) - expansions.append(("_PUSH_FRAME", OPARG_SIZES["OPARG_FULL"], 0)) - continue # All component instructions must be viable uops if not part.instr.is_viable_uop(): # This note just reminds us about macros that cannot @@ -463,7 +456,10 @@ def write_macro_expansions( ) return if not part.active_caches: - size, offset = OPARG_SIZES["OPARG_FULL"], 0 + if part.instr.name == "SAVE_IP": + size, offset = OPARG_SIZES["OPARG_SAVE_IP"], cache_offset + else: + size, offset = OPARG_SIZES["OPARG_FULL"], 0 else: # If this assert triggers, is_viable_uops() lied assert len(part.active_caches) == 1, (name, part.instr.name) diff --git a/Tools/cases_generator/stacking.py b/Tools/cases_generator/stacking.py index a721cba0247a8a..7154d8fbab454f 100644 --- a/Tools/cases_generator/stacking.py +++ b/Tools/cases_generator/stacking.py @@ -147,6 +147,8 @@ class EffectManager: # Track offsets from stack pointer min_offset: StackOffset final_offset: StackOffset + # Link to previous manager + pred: "EffectManager | None" = None def __init__( self, @@ -168,7 +170,8 @@ def __init__( self.pokes.append(StackItem(offset=self.final_offset.clone(), effect=eff)) self.final_offset.higher(eff) - if pred: + self.pred = pred + while pred: # Replace push(x) + pop(y) with copy(x, y). # Check that the sources and destinations are disjoint. sources: set[str] = set() @@ -193,6 +196,11 @@ def __init__( sources, destinations, ) + # See if we can get more copies of a earlier predecessor. + if self.peeks and not pred.pokes and not pred.peeks: + pred = pred.pred + else: + pred = None # Break def adjust_deeper(self, eff: StackEffect) -> None: for peek in self.peeks: @@ -305,7 +313,11 @@ def write_single_instr( def write_macro_instr( mac: MacroInstruction, out: Formatter, family: Family | None ) -> None: - parts = [part for part in mac.parts if isinstance(part, Component)] + parts = [ + part + for part in mac.parts + if isinstance(part, Component) and part.instr.name != "SAVE_IP" + ] out.emit("") with out.block(f"TARGET({mac.name})"): if mac.predicted: From cf8e2c02d191d3828366b24c81ee4b4c213d2ef0 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 15 Aug 2023 13:20:38 -0700 Subject: [PATCH 18/19] Call _Py_EnterRecursivePy in _FRAME_PUSH --- Python/bytecodes.c | 1 + Python/ceval.c | 5 ----- Python/ceval_macros.h | 5 +++++ Python/executor.c | 1 + Python/executor_cases.c.h | 1 + Python/generated_cases.c.h | 1 + 6 files changed, 9 insertions(+), 5 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index cbbd6d48daae75..0ad08aafd0ee2a 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2994,6 +2994,7 @@ dummy_func( #endif #if TIER_TWO frame = tstate->cframe->current_frame = new_frame; + ERROR_IF(_Py_EnterRecursivePy(tstate), xz); stack_pointer = _PyFrame_GetStackPointer(frame); ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; #endif diff --git a/Python/ceval.c b/Python/ceval.c index 2370636d765d9c..26e741ed7c7547 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -602,11 +602,6 @@ int _Py_CheckRecursiveCallPy( return 0; } -static inline int _Py_EnterRecursivePy(PyThreadState *tstate) { - return (tstate->py_recursion_remaining-- <= 0) && - _Py_CheckRecursiveCallPy(tstate); -} - static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate) { tstate->py_recursion_remaining++; diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 77f760f0bb5995..67afd5c12d5ce3 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -369,3 +369,8 @@ static const convertion_func_ptr CONVERSION_FUNCTIONS[4] = { #else #define _Py_atomic_load_relaxed_int32(ATOMIC_VAL) _Py_atomic_load_relaxed(ATOMIC_VAL) #endif + +static inline int _Py_EnterRecursivePy(PyThreadState *tstate) { + return (tstate->py_recursion_remaining-- <= 0) && + _Py_CheckRecursiveCallPy(tstate); +} diff --git a/Python/executor.c b/Python/executor.c index c18ba6d4ade3e5..1aa0578b5f899c 100644 --- a/Python/executor.c +++ b/Python/executor.c @@ -115,6 +115,7 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject pop_2_error: STACK_SHRINK(1); pop_1_error: +pop_1_exit_unwind: STACK_SHRINK(1); error: // On ERROR_IF we return NULL as the frame. diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 4ace06857eb424..15c7e5b6fd0589 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2216,6 +2216,7 @@ #endif #if TIER_TWO frame = tstate->cframe->current_frame = new_frame; + if (_Py_EnterRecursivePy(tstate)) goto pop_1_exit_unwind; stack_pointer = _PyFrame_GetStackPointer(frame); ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; #endif diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index a42457d05faf03..910a10048d9919 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3820,6 +3820,7 @@ #endif #if TIER_TWO frame = tstate->cframe->current_frame = new_frame; + if (_Py_EnterRecursivePy(tstate)) goto pop_1_exit_unwind; stack_pointer = _PyFrame_GetStackPointer(frame); ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; #endif From 05af84805a04747c1de7a5b843041b8d55c31bee Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 16 Aug 2023 12:04:23 -0700 Subject: [PATCH 19/19] Introduce SAVE_CURRENT_IP uop per Mark's proposal --- Include/internal/pycore_opcode_metadata.h | 10 ++++++++-- Python/abstract_interp_cases.c.h | 4 ++++ Python/bytecodes.c | 13 ++++++++++++- Python/ceval_macros.h | 9 ++------- Python/executor.c | 8 -------- Python/executor_cases.c.h | 13 ++++++++++++- Python/generated_cases.c.h | 14 ++++++++++++-- Tools/cases_generator/flags.py | 2 +- Tools/cases_generator/instructions.py | 4 +--- Tools/cases_generator/stacking.py | 16 +++++++++++----- 10 files changed, 63 insertions(+), 30 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 4b6f274973e848..9fcf48fa71f88f 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -60,7 +60,8 @@ #define _POP_JUMP_IF_FALSE 336 #define _POP_JUMP_IF_TRUE 337 #define JUMP_TO_TOP 338 -#define INSERT 339 +#define SAVE_CURRENT_IP 339 +#define INSERT 340 extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump); #ifdef NEED_OPCODE_METADATA @@ -576,6 +577,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case SAVE_IP: return 0; + case SAVE_CURRENT_IP: + return 0; case EXIT_TRACE: return 0; case INSERT: @@ -1100,6 +1103,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case SAVE_IP: return 0; + case SAVE_CURRENT_IP: + return 0; case EXIT_TRACE: return 0; case INSERT: @@ -1501,7 +1506,7 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN [GET_YIELD_FROM_ITER] = { .nuops = 1, .uops = { { GET_YIELD_FROM_ITER, 0, 0 } } }, [WITH_EXCEPT_START] = { .nuops = 1, .uops = { { WITH_EXCEPT_START, 0, 0 } } }, [PUSH_EXC_INFO] = { .nuops = 1, .uops = { { PUSH_EXC_INFO, 0, 0 } } }, - [CALL_PY_EXACT_ARGS] = { .nuops = 6, .uops = { { _CHECK_PEP_523, 0, 0 }, { _CHECK_FUNCTION_EXACT_ARGS, 2, 1 }, { _CHECK_STACK_SPACE, 0, 0 }, { _INIT_CALL_PY_EXACT_ARGS, 0, 0 }, { SAVE_IP, 7, 3 }, { _PUSH_FRAME, 0, 0 } } }, + [CALL_PY_EXACT_ARGS] = { .nuops = 7, .uops = { { _CHECK_PEP_523, 0, 0 }, { _CHECK_FUNCTION_EXACT_ARGS, 2, 1 }, { _CHECK_STACK_SPACE, 0, 0 }, { _INIT_CALL_PY_EXACT_ARGS, 0, 0 }, { SAVE_IP, 7, 3 }, { SAVE_CURRENT_IP, 0, 0 }, { _PUSH_FRAME, 0, 0 } } }, [CALL_NO_KW_TYPE_1] = { .nuops = 1, .uops = { { CALL_NO_KW_TYPE_1, 0, 0 } } }, [CALL_NO_KW_STR_1] = { .nuops = 1, .uops = { { CALL_NO_KW_STR_1, 0, 0 } } }, [CALL_NO_KW_TUPLE_1] = { .nuops = 1, .uops = { { CALL_NO_KW_TUPLE_1, 0, 0 } } }, @@ -1567,6 +1572,7 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = { [_POP_JUMP_IF_FALSE] = "_POP_JUMP_IF_FALSE", [_POP_JUMP_IF_TRUE] = "_POP_JUMP_IF_TRUE", [JUMP_TO_TOP] = "JUMP_TO_TOP", + [SAVE_CURRENT_IP] = "SAVE_CURRENT_IP", [INSERT] = "INSERT", }; #endif // NEED_OPCODE_METADATA diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index ca3c274e87b691..eef071119bcd84 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -775,6 +775,10 @@ break; } + case SAVE_CURRENT_IP: { + break; + } + case EXIT_TRACE: { break; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index dc6ac36f4dfb3f..9d28c4ce5f2837 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2991,7 +2991,7 @@ dummy_func( // Eventually this should be the only occurrence of this code. frame->return_offset = 0; assert(tstate->interp->eval_frame == NULL); - SAVE_FRAME_STATE(); // Signals to the code generator + _PyFrame_SetStackPointer(frame, stack_pointer); new_frame->previous = frame; CALL_STAT_INC(inlined_py_calls); #if TIER_ONE @@ -3013,6 +3013,7 @@ dummy_func( _CHECK_STACK_SPACE + _INIT_CALL_PY_EXACT_ARGS + SAVE_IP + // Tier 2 only; special-cased oparg + SAVE_CURRENT_IP + // Sets frame->prev_instr _PUSH_FRAME; inst(CALL_PY_WITH_DEFAULTS, (unused/1, func_version/2, callable, self_or_null, args[oparg] -- unused)) { @@ -3775,6 +3776,16 @@ dummy_func( frame->prev_instr = ip_offset + oparg; } + op(SAVE_CURRENT_IP, (--)) { + #if TIER_ONE + frame->prev_instr = next_instr - 1; + #endif + #if TIER_TWO + // Relies on a preceding SAVE_IP + frame->prev_instr--; + #endif + } + op(EXIT_TRACE, (--)) { frame->prev_instr--; // Back up to just before destination _PyFrame_SetStackPointer(frame, stack_pointer); diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 67afd5c12d5ce3..5e2db1e0b394e6 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -103,16 +103,11 @@ DISPATCH_GOTO(); \ } -#define SAVE_FRAME_STATE() \ - do { \ - frame->prev_instr = next_instr - 1; \ - _PyFrame_SetStackPointer(frame, stack_pointer); \ - } while (0) - #define DISPATCH_INLINED(NEW_FRAME) \ do { \ assert(tstate->interp->eval_frame == NULL); \ - SAVE_FRAME_STATE(); \ + _PyFrame_SetStackPointer(frame, stack_pointer); \ + frame->prev_instr = next_instr - 1; \ (NEW_FRAME)->previous = frame; \ frame = cframe.current_frame = (NEW_FRAME); \ CALL_STAT_INC(inlined_py_calls); \ diff --git a/Python/executor.c b/Python/executor.c index 1aa0578b5f899c..5a571e6da4673f 100644 --- a/Python/executor.c +++ b/Python/executor.c @@ -30,14 +30,6 @@ #undef ENABLE_SPECIALIZATION #define ENABLE_SPECIALIZATION 0 -#undef SAVE_FRAME_STATE -#define SAVE_FRAME_STATE() \ - do { \ - /* Assume preceding SAVE_IP has set frame->prev_instr */ \ - frame->prev_instr--; \ - _PyFrame_SetStackPointer(frame, stack_pointer); \ - } while (0) - _PyInterpreterFrame * _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject **stack_pointer) diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 7e58c3f7b4d236..b3dd3133530562 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2209,7 +2209,7 @@ // Eventually this should be the only occurrence of this code. frame->return_offset = 0; assert(tstate->interp->eval_frame == NULL); - SAVE_FRAME_STATE(); // Signals to the code generator + _PyFrame_SetStackPointer(frame, stack_pointer); new_frame->previous = frame; CALL_STAT_INC(inlined_py_calls); #if TIER_ONE @@ -2795,6 +2795,17 @@ break; } + case SAVE_CURRENT_IP: { + #if TIER_ONE + frame->prev_instr = next_instr - 1; + #endif + #if TIER_TWO + // Relies on a preceding SAVE_IP + frame->prev_instr--; + #endif + break; + } + case EXIT_TRACE: { frame->prev_instr--; // Back up to just before destination _PyFrame_SetStackPointer(frame, stack_pointer); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 4628bbf6dc3efe..11d560a6e77adf 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3808,16 +3808,26 @@ new_frame->localsplus[i] = args[i]; } } + // SAVE_CURRENT_IP + next_instr += 3; + { + #if TIER_ONE + frame->prev_instr = next_instr - 1; + #endif + #if TIER_TWO + // Relies on a preceding SAVE_IP + frame->prev_instr--; + #endif + } // _PUSH_FRAME STACK_SHRINK(oparg); STACK_SHRINK(2); - next_instr += 3; { // Write it out explicitly because it's subtly different. // Eventually this should be the only occurrence of this code. frame->return_offset = 0; assert(tstate->interp->eval_frame == NULL); - SAVE_FRAME_STATE(); // Signals to the code generator + _PyFrame_SetStackPointer(frame, stack_pointer); new_frame->previous = frame; CALL_STAT_INC(inlined_py_calls); #if TIER_ONE diff --git a/Tools/cases_generator/flags.py b/Tools/cases_generator/flags.py index f7ebdeb0d65677..962f003b194dbd 100644 --- a/Tools/cases_generator/flags.py +++ b/Tools/cases_generator/flags.py @@ -92,7 +92,7 @@ def variable_used_unspecialized(node: parsing.Node, name: str) -> bool: if text == "#if": if ( i + 1 < len(node.tokens) - and node.tokens[i + 1].text == "ENABLE_SPECIALIZATION" + and node.tokens[i + 1].text in ("ENABLE_SPECIALIZATION", "TIER_ONE") ): skipping = True elif text in ("#else", "#endif"): diff --git a/Tools/cases_generator/instructions.py b/Tools/cases_generator/instructions.py index 7dabff942f4769..9143ae0db7be81 100644 --- a/Tools/cases_generator/instructions.py +++ b/Tools/cases_generator/instructions.py @@ -60,7 +60,6 @@ class Instruction: # Computed by constructor always_exits: str # If the block always exits, its last line; else "" - save_frame_state: bool # Whether the instruction uses SAVE_FRAME_STATE() has_deopt: bool cache_offset: int cache_effects: list[parsing.CacheEffect] @@ -84,7 +83,6 @@ def __init__(self, inst: parsing.InstDef): self.block ) self.always_exits = always_exits(self.block_text) - self.save_frame_state = variable_used(self.inst, "SAVE_FRAME_STATE") self.has_deopt = variable_used(self.inst, "DEOPT_IF") self.cache_effects = [ effect for effect in inst.inputs if isinstance(effect, parsing.CacheEffect) @@ -122,7 +120,7 @@ def __init__(self, inst: parsing.InstDef): def is_viable_uop(self) -> bool: """Whether this instruction is viable as a uop.""" dprint: typing.Callable[..., None] = lambda *args, **kwargs: None - if "PUSH_FRAME" in self.name: + if "FRAME" in self.name: dprint = print if self.name == "EXIT_TRACE": diff --git a/Tools/cases_generator/stacking.py b/Tools/cases_generator/stacking.py index 2067557faea7d3..8361eb99f88a7c 100644 --- a/Tools/cases_generator/stacking.py +++ b/Tools/cases_generator/stacking.py @@ -324,10 +324,10 @@ def write_macro_instr( out.emit(f"PREDICTED({mac.name});") out.static_assert_family_size(mac.name, family, mac.cache_offset) try: - write_components(parts, out, TIER_ONE, mac.cache_offset) + next_instr_is_set = write_components(parts, out, TIER_ONE, mac.cache_offset) except AssertionError as err: raise AssertionError(f"Error writing macro {mac.name}") from err - if not parts[-1].instr.always_exits and not parts[-1].instr.save_frame_state: + if not parts[-1].instr.always_exits and not next_instr_is_set: if mac.cache_offset: out.emit(f"next_instr += {mac.cache_offset};") out.emit("DISPATCH();") @@ -338,7 +338,7 @@ def write_components( out: Formatter, tier: Tiers, cache_offset: int, -) -> None: +) -> bool: managers = get_managers(parts) all_vars: dict[str, StackEffect] = {} @@ -359,6 +359,7 @@ def write_components( for name, eff in all_vars.items(): out.declare(eff, None) + next_instr_is_set = False for mgr in managers: if len(parts) > 1: out.emit(f"// {mgr.instr.name}") @@ -379,11 +380,14 @@ def write_components( poke.as_stack_effect(lax=True), ) - if mgr.instr.save_frame_state: + if mgr.instr.name == "_PUSH_FRAME": # Adjust stack to min_offset (input effects materialized) out.stack_adjust(mgr.min_offset.deep, mgr.min_offset.high) # Use clone() since adjust_inverse() mutates final_offset mgr.adjust_inverse(mgr.final_offset.clone()) + + if mgr.instr.name == "SAVE_CURRENT_IP": + next_instr_is_set = True if cache_offset: out.emit(f"next_instr += {cache_offset};") @@ -393,7 +397,7 @@ def write_components( with out.block(""): mgr.instr.write_body(out, -4, mgr.active_caches, tier) - if mgr is managers[-1] and not mgr.instr.save_frame_state: + if mgr is managers[-1] and not next_instr_is_set: # TODO: Explain why this adjustment is needed. out.stack_adjust(mgr.final_offset.deep, mgr.final_offset.high) # Use clone() since adjust_inverse() mutates final_offset @@ -406,6 +410,8 @@ def write_components( poke.effect, ) + return next_instr_is_set + def write_single_instr_for_abstract_interp( instr: Instruction, out: Formatter pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy