From 90d38d7611fc5853c66573565055c2b0ef99d0c9 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Wed, 21 May 2025 21:32:45 +0200 Subject: [PATCH 1/9] Optimize _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW --- Include/internal/pycore_uop_ids.h | 91 ++++++++++---------- Include/internal/pycore_uop_metadata.h | 26 +++++- Lib/test/test_capi/test_opt.py | 21 ++++- Python/bytecodes.c | 42 +++++++++- Python/executor_cases.c.h | 110 +++++++++++++++++++++++++ Python/optimizer_analysis.c | 34 ++++++++ Python/optimizer_bytecodes.c | 8 ++ Python/optimizer_cases.c.h | 38 +++++++++ 8 files changed, 317 insertions(+), 53 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index d6c2ba59db1eda..d08799487fda42 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -265,75 +265,80 @@ extern "C" { #define _MONITOR_JUMP_BACKWARD 483 #define _MONITOR_RESUME 484 #define _NOP NOP -#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 485 +#define _POP_CALL 485 +#define _POP_CALL_LOAD_CONST_INLINE_BORROW 486 +#define _POP_CALL_ONE 487 +#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 488 +#define _POP_CALL_TWO 489 +#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 490 #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 486 -#define _POP_JUMP_IF_TRUE 487 +#define _POP_JUMP_IF_FALSE 491 +#define _POP_JUMP_IF_TRUE 492 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE 488 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 489 -#define _POP_TWO 490 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 491 +#define _POP_TOP_LOAD_CONST_INLINE 493 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 494 +#define _POP_TWO 495 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 496 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 492 +#define _PUSH_FRAME 497 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 493 -#define _PY_FRAME_GENERAL 494 -#define _PY_FRAME_KW 495 -#define _QUICKEN_RESUME 496 -#define _REPLACE_WITH_TRUE 497 +#define _PUSH_NULL_CONDITIONAL 498 +#define _PY_FRAME_GENERAL 499 +#define _PY_FRAME_KW 500 +#define _QUICKEN_RESUME 501 +#define _REPLACE_WITH_TRUE 502 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 498 -#define _SEND 499 -#define _SEND_GEN_FRAME 500 +#define _SAVE_RETURN_OFFSET 503 +#define _SEND 504 +#define _SEND_GEN_FRAME 505 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 501 -#define _STORE_ATTR 502 -#define _STORE_ATTR_INSTANCE_VALUE 503 -#define _STORE_ATTR_SLOT 504 -#define _STORE_ATTR_WITH_HINT 505 +#define _START_EXECUTOR 506 +#define _STORE_ATTR 507 +#define _STORE_ATTR_INSTANCE_VALUE 508 +#define _STORE_ATTR_SLOT 509 +#define _STORE_ATTR_WITH_HINT 510 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 506 -#define _STORE_FAST_0 507 -#define _STORE_FAST_1 508 -#define _STORE_FAST_2 509 -#define _STORE_FAST_3 510 -#define _STORE_FAST_4 511 -#define _STORE_FAST_5 512 -#define _STORE_FAST_6 513 -#define _STORE_FAST_7 514 +#define _STORE_FAST 511 +#define _STORE_FAST_0 512 +#define _STORE_FAST_1 513 +#define _STORE_FAST_2 514 +#define _STORE_FAST_3 515 +#define _STORE_FAST_4 516 +#define _STORE_FAST_5 517 +#define _STORE_FAST_6 518 +#define _STORE_FAST_7 519 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 515 -#define _STORE_SUBSCR 516 -#define _STORE_SUBSCR_DICT 517 -#define _STORE_SUBSCR_LIST_INT 518 +#define _STORE_SLICE 520 +#define _STORE_SUBSCR 521 +#define _STORE_SUBSCR_DICT 522 +#define _STORE_SUBSCR_LIST_INT 523 #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 519 -#define _TO_BOOL 520 +#define _TIER2_RESUME_CHECK 524 +#define _TO_BOOL 525 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 521 +#define _TO_BOOL_LIST 526 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 522 +#define _TO_BOOL_STR 527 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 523 -#define _UNPACK_SEQUENCE_LIST 524 -#define _UNPACK_SEQUENCE_TUPLE 525 -#define _UNPACK_SEQUENCE_TWO_TUPLE 526 +#define _UNPACK_SEQUENCE 528 +#define _UNPACK_SEQUENCE_LIST 529 +#define _UNPACK_SEQUENCE_TUPLE 530 +#define _UNPACK_SEQUENCE_TWO_TUPLE 531 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 526 +#define MAX_UOP_ID 531 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 725238228a3dbc..5ebe124983b862 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -303,9 +303,14 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_CONST_INLINE] = HAS_PURE_FLAG, [_POP_TOP_LOAD_CONST_INLINE] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG, - [_POP_TOP_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_POP_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_POP_CALL] = HAS_ESCAPES_FLAG, + [_POP_CALL_ONE] = HAS_ESCAPES_FLAG, + [_POP_CALL_TWO] = HAS_ESCAPES_FLAG, + [_POP_TOP_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG, + [_POP_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG, + [_POP_CALL_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG, + [_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG, + [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG, [_LOAD_CONST_UNDER_INLINE] = 0, [_LOAD_CONST_UNDER_INLINE_BORROW] = 0, [_CHECK_FUNCTION] = HAS_DEOPT_FLAG, @@ -557,6 +562,11 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_MAYBE_EXPAND_METHOD] = "_MAYBE_EXPAND_METHOD", [_MAYBE_EXPAND_METHOD_KW] = "_MAYBE_EXPAND_METHOD_KW", [_NOP] = "_NOP", + [_POP_CALL] = "_POP_CALL", + [_POP_CALL_LOAD_CONST_INLINE_BORROW] = "_POP_CALL_LOAD_CONST_INLINE_BORROW", + [_POP_CALL_ONE] = "_POP_CALL_ONE", + [_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW] = "_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW", + [_POP_CALL_TWO] = "_POP_CALL_TWO", [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = "_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", [_POP_EXCEPT] = "_POP_EXCEPT", [_POP_TOP] = "_POP_TOP", @@ -1194,10 +1204,20 @@ int _PyUop_num_popped(int opcode, int oparg) return 1; case _LOAD_CONST_INLINE_BORROW: return 0; + case _POP_CALL: + return 2; + case _POP_CALL_ONE: + return 3; + case _POP_CALL_TWO: + return 4; case _POP_TOP_LOAD_CONST_INLINE_BORROW: return 1; case _POP_TWO_LOAD_CONST_INLINE_BORROW: return 2; + case _POP_CALL_LOAD_CONST_INLINE_BORROW: + return 2; + case _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW: + return 3; case _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW: return 4; case _LOAD_CONST_UNDER_INLINE: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 0bc0e1b212b6b8..f19669f69512a9 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2002,7 +2002,11 @@ def testfunc(n): self.assertNotIn("_CALL_ISINSTANCE", uops) self.assertNotIn("_GUARD_THIRD_NULL", uops) self.assertNotIn("_GUARD_CALLABLE_ISINSTANCE", uops) - self.assertIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_TOP_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) + def test_call_list_append(self): def testfunc(n): @@ -2035,7 +2039,10 @@ def testfunc(n): self.assertNotIn("_CALL_ISINSTANCE", uops) self.assertNotIn("_TO_BOOL_BOOL", uops) self.assertNotIn("_GUARD_IS_TRUE_POP", uops) - self.assertIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_TOP_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) def test_call_isinstance_is_false(self): def testfunc(n): @@ -2053,7 +2060,10 @@ def testfunc(n): self.assertNotIn("_CALL_ISINSTANCE", uops) self.assertNotIn("_TO_BOOL_BOOL", uops) self.assertNotIn("_GUARD_IS_FALSE_POP", uops) - self.assertIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_TOP_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) def test_call_isinstance_subclass(self): def testfunc(n): @@ -2071,7 +2081,10 @@ def testfunc(n): self.assertNotIn("_CALL_ISINSTANCE", uops) self.assertNotIn("_TO_BOOL_BOOL", uops) self.assertNotIn("_GUARD_IS_TRUE_POP", uops) - self.assertIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_TOP_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW", uops) + self.assertNotIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) def test_call_isinstance_unknown_object(self): def testfunc(n): diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 652bda9c182e49..be22c7446f5402 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -5287,18 +5287,54 @@ dummy_func( value = PyStackRef_FromPyObjectBorrow(ptr); } - tier2 pure op (_POP_TOP_LOAD_CONST_INLINE_BORROW, (ptr/4, pop -- value)) { + tier2 op(_POP_CALL, (callable, null --)) { + (void)null; // Silence compiler warnings about unused variables + DEAD(null); + PyStackRef_CLOSE(callable); + } + + tier2 op(_POP_CALL_ONE, (callable, null, pop --)) { + PyStackRef_CLOSE(pop); + (void)null; // Silence compiler warnings about unused variables + DEAD(null); + PyStackRef_CLOSE(callable); + } + + tier2 op(_POP_CALL_TWO, (callable, null, pop1, pop2 --)) { + PyStackRef_CLOSE(pop2); + PyStackRef_CLOSE(pop1); + (void)null; // Silence compiler warnings about unused variables + DEAD(null); + PyStackRef_CLOSE(callable); + } + + tier2 op(_POP_TOP_LOAD_CONST_INLINE_BORROW, (ptr/4, pop -- value)) { PyStackRef_CLOSE(pop); value = PyStackRef_FromPyObjectBorrow(ptr); } - tier2 pure op(_POP_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, pop1, pop2 -- value)) { + tier2 op(_POP_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, pop1, pop2 -- value)) { PyStackRef_CLOSE(pop2); PyStackRef_CLOSE(pop1); value = PyStackRef_FromPyObjectBorrow(ptr); } - tier2 pure op(_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, callable, null, pop1, pop2 -- value)) { + tier2 op(_POP_CALL_LOAD_CONST_INLINE_BORROW, (ptr/4, callable, null -- value)) { + (void)null; // Silence compiler warnings about unused variables + DEAD(null); + PyStackRef_CLOSE(callable); + value = PyStackRef_FromPyObjectBorrow(ptr); + } + + tier2 op(_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, (ptr/4, callable, null, pop -- value)) { + PyStackRef_CLOSE(pop); + (void)null; // Silence compiler warnings about unused variables + DEAD(null); + PyStackRef_CLOSE(callable); + value = PyStackRef_FromPyObjectBorrow(ptr); + } + + tier2 op(_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, callable, null, pop1, pop2 -- value)) { PyStackRef_CLOSE(pop2); PyStackRef_CLOSE(pop1); (void)null; // Silence compiler warnings about unused variables diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index fcde31a30126a4..40090e692e4a72 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -7030,6 +7030,69 @@ break; } + case _POP_CALL: { + _PyStackRef null; + _PyStackRef callable; + null = stack_pointer[-1]; + callable = stack_pointer[-2]; + (void)null; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + break; + } + + case _POP_CALL_ONE: { + _PyStackRef pop; + _PyStackRef null; + _PyStackRef callable; + pop = stack_pointer[-1]; + null = stack_pointer[-2]; + callable = stack_pointer[-3]; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(pop); + stack_pointer = _PyFrame_GetStackPointer(frame); + (void)null; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + break; + } + + case _POP_CALL_TWO: { + _PyStackRef pop2; + _PyStackRef pop1; + _PyStackRef null; + _PyStackRef callable; + pop2 = stack_pointer[-1]; + pop1 = stack_pointer[-2]; + null = stack_pointer[-3]; + callable = stack_pointer[-4]; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(pop2); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(pop1); + stack_pointer = _PyFrame_GetStackPointer(frame); + (void)null; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + break; + } + case _POP_TOP_LOAD_CONST_INLINE_BORROW: { _PyStackRef pop; _PyStackRef value; @@ -7071,6 +7134,53 @@ break; } + case _POP_CALL_LOAD_CONST_INLINE_BORROW: { + _PyStackRef null; + _PyStackRef callable; + _PyStackRef value; + null = stack_pointer[-1]; + callable = stack_pointer[-2]; + PyObject *ptr = (PyObject *)CURRENT_OPERAND0(); + (void)null; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + value = PyStackRef_FromPyObjectBorrow(ptr); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW: { + _PyStackRef pop; + _PyStackRef null; + _PyStackRef callable; + _PyStackRef value; + pop = stack_pointer[-1]; + null = stack_pointer[-2]; + callable = stack_pointer[-3]; + PyObject *ptr = (PyObject *)CURRENT_OPERAND0(); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(pop); + stack_pointer = _PyFrame_GetStackPointer(frame); + (void)null; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(callable); + stack_pointer = _PyFrame_GetStackPointer(frame); + value = PyStackRef_FromPyObjectBorrow(ptr); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW: { _PyStackRef pop2; _PyStackRef pop1; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 5c50228a13b2d1..c129177c4fdd03 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -552,11 +552,13 @@ const uint16_t op_without_push[MAX_UOP_ID + 1] = { [_POP_TOP_LOAD_CONST_INLINE] = _POP_TOP, [_POP_TOP_LOAD_CONST_INLINE_BORROW] = _POP_TOP, [_POP_TWO_LOAD_CONST_INLINE_BORROW] = _POP_TWO, + [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = _POP_CALL_TWO, }; const bool op_skip[MAX_UOP_ID + 1] = { [_NOP] = true, [_CHECK_VALIDITY] = true, + [_SET_IP] = true, }; const uint16_t op_without_pop[MAX_UOP_ID + 1] = { @@ -565,6 +567,14 @@ const uint16_t op_without_pop[MAX_UOP_ID + 1] = { [_POP_TOP_LOAD_CONST_INLINE_BORROW] = _LOAD_CONST_INLINE_BORROW, [_POP_TWO] = _POP_TOP, [_POP_TWO_LOAD_CONST_INLINE_BORROW] = _POP_TOP_LOAD_CONST_INLINE_BORROW, + [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, + [_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW] = _POP_CALL_LOAD_CONST_INLINE_BORROW, + [_POP_CALL_TWO] = _POP_CALL_ONE, + [_POP_CALL_ONE] = _POP_CALL, + // The following two instructions are handled separately in remove_unneeded_uops. + // The TOS for both is null so calling _POP_TOP (which closes the stackref) is not safe. + // [_POP_CALL] = _POP_TOP, + // [_POP_CALL_LOAD_CONST_INLINE_BORROW] = _POP_TOP_LOAD_CONST_INLINE_BORROW, }; @@ -601,6 +611,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) // _LOAD_FAST + _POP_TWO_LOAD_CONST_INLINE_BORROW + _POP_TOP // ...becomes: // _NOP + _POP_TOP + _NOP + again: while (op_without_pop[opcode]) { _PyUOpInstruction *last = &buffer[pc - 1]; while (op_skip[last->opcode]) { @@ -616,6 +627,29 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) pc = last - buffer; } } + // Handle _POP_CALL and _POP_CALL_LOAD_CONST_INLINE_BORROW separately. + // This looks for a preceding _PUSH_NULL instruction and simplifies to _POP_TOP. + if (opcode == _POP_CALL || opcode == _POP_CALL_LOAD_CONST_INLINE_BORROW) { + _PyUOpInstruction *last = &buffer[pc - 1]; + while (op_skip[last->opcode]) { + last--; + } + if (last->opcode == _PUSH_NULL) { + last->opcode = _NOP; + if (opcode == _POP_CALL) { + opcode = buffer[pc].opcode = _POP_TOP; + } + else { + opcode = buffer[pc].opcode = _POP_TOP_LOAD_CONST_INLINE_BORROW; + } + opcode = buffer[pc].opcode = _POP_TOP; + if (op_without_pop[last->opcode]) { + opcode = last->opcode; + pc = last - buffer; + } + goto again; + } + } /* _PUSH_FRAME doesn't escape or error, but it * does need the IP for the return address */ bool needs_ip = opcode == _PUSH_FRAME; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 0b6bbd133d6ac9..49c6bfb6c1b01a 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -551,6 +551,14 @@ dummy_func(void) { value = sym_new_const(ctx, ptr); } + op(_POP_CALL_LOAD_CONST_INLINE_BORROW, (ptr/4, unused, unused -- value)) { + value = sym_new_const(ctx, ptr); + } + + op(_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW, (ptr/4, unused, unused, unused -- value)) { + value = sym_new_const(ctx, ptr); + } + op(_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, unused, unused, unused, unused -- value)) { value = sym_new_const(ctx, ptr); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 5a9fcf3b1b6924..bf7ac72d4579e7 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2601,6 +2601,24 @@ break; } + case _POP_CALL: { + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_CALL_ONE: { + stack_pointer += -3; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_CALL_TWO: { + stack_pointer += -4; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _POP_TOP_LOAD_CONST_INLINE_BORROW: { JitOptSymbol *value; PyObject *ptr = (PyObject *)this_instr->operand0; @@ -2618,6 +2636,26 @@ break; } + case _POP_CALL_LOAD_CONST_INLINE_BORROW: { + JitOptSymbol *value; + PyObject *ptr = (PyObject *)this_instr->operand0; + value = sym_new_const(ctx, ptr); + stack_pointer[-2] = value; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW: { + JitOptSymbol *value; + PyObject *ptr = (PyObject *)this_instr->operand0; + value = sym_new_const(ctx, ptr); + stack_pointer[-3] = value; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW: { JitOptSymbol *value; PyObject *ptr = (PyObject *)this_instr->operand0; From 0e66db44fb0f0900bd98d92b8ebfac9e24d9a5e3 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Thu, 22 May 2025 17:49:46 +0200 Subject: [PATCH 2/9] Add news entry --- .../2025-05-22-17-49-39.gh-issue-131798.U6ZmFm.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-22-17-49-39.gh-issue-131798.U6ZmFm.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-22-17-49-39.gh-issue-131798.U6ZmFm.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-22-17-49-39.gh-issue-131798.U6ZmFm.rst new file mode 100644 index 00000000000000..fdb6a2fe0b383b --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-22-17-49-39.gh-issue-131798.U6ZmFm.rst @@ -0,0 +1 @@ +Optimize ``_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW``. From 0e1030bd1bf9d131530f4446f6e81696f87499b4 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Thu, 22 May 2025 17:54:10 +0200 Subject: [PATCH 3/9] Trailing whitespace --- Python/optimizer_analysis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index c129177c4fdd03..938e0a9583f4f2 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -572,7 +572,7 @@ const uint16_t op_without_pop[MAX_UOP_ID + 1] = { [_POP_CALL_TWO] = _POP_CALL_ONE, [_POP_CALL_ONE] = _POP_CALL, // The following two instructions are handled separately in remove_unneeded_uops. - // The TOS for both is null so calling _POP_TOP (which closes the stackref) is not safe. + // The TOS for both is null so calling _POP_TOP (which closes the stackref) is not safe. // [_POP_CALL] = _POP_TOP, // [_POP_CALL_LOAD_CONST_INLINE_BORROW] = _POP_TOP_LOAD_CONST_INLINE_BORROW, }; From af8864687ac5bf1c8e717d1a8c2d7df88631230a Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Thu, 22 May 2025 18:02:12 +0200 Subject: [PATCH 4/9] Simplify logic --- Python/optimizer_analysis.c | 39 +++++++++++++------------------------ 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 938e0a9583f4f2..061b4c3fa20060 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -611,30 +611,23 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) // _LOAD_FAST + _POP_TWO_LOAD_CONST_INLINE_BORROW + _POP_TOP // ...becomes: // _NOP + _POP_TOP + _NOP - again: - while (op_without_pop[opcode]) { + while (op_without_pop[opcode] || opcode == _POP_CALL || opcode == _POP_CALL_LOAD_CONST_INLINE_BORROW) { _PyUOpInstruction *last = &buffer[pc - 1]; while (op_skip[last->opcode]) { last--; } - if (!op_without_push[last->opcode]) { - break; - } - last->opcode = op_without_push[last->opcode]; - opcode = buffer[pc].opcode = op_without_pop[opcode]; - if (op_without_pop[last->opcode]) { - opcode = last->opcode; - pc = last - buffer; - } - } - // Handle _POP_CALL and _POP_CALL_LOAD_CONST_INLINE_BORROW separately. - // This looks for a preceding _PUSH_NULL instruction and simplifies to _POP_TOP. - if (opcode == _POP_CALL || opcode == _POP_CALL_LOAD_CONST_INLINE_BORROW) { - _PyUOpInstruction *last = &buffer[pc - 1]; - while (op_skip[last->opcode]) { - last--; + if (op_without_push[last->opcode]) { + last->opcode = op_without_push[last->opcode]; + opcode = buffer[pc].opcode = op_without_pop[opcode]; + if (op_without_pop[last->opcode]) { + opcode = last->opcode; + pc = last - buffer; + } } - if (last->opcode == _PUSH_NULL) { + else if (last->opcode == _PUSH_NULL) { + // Handle _POP_CALL and _POP_CALL_LOAD_CONST_INLINE_BORROW separately. + // This looks for a preceding _PUSH_NULL instruction and + // simplifies to _POP_TOP(_LOAD_CONST_INLINE_BORROW). last->opcode = _NOP; if (opcode == _POP_CALL) { opcode = buffer[pc].opcode = _POP_TOP; @@ -642,12 +635,8 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) else { opcode = buffer[pc].opcode = _POP_TOP_LOAD_CONST_INLINE_BORROW; } - opcode = buffer[pc].opcode = _POP_TOP; - if (op_without_pop[last->opcode]) { - opcode = last->opcode; - pc = last - buffer; - } - goto again; + } else { + break; } } /* _PUSH_FRAME doesn't escape or error, but it From 3ed47d286f1d5649022b08acc63ef8e04a3f33f7 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Thu, 22 May 2025 18:05:17 +0200 Subject: [PATCH 5/9] Wrap line --- Python/optimizer_analysis.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 061b4c3fa20060..5b2923a54ae9b7 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -611,7 +611,9 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) // _LOAD_FAST + _POP_TWO_LOAD_CONST_INLINE_BORROW + _POP_TOP // ...becomes: // _NOP + _POP_TOP + _NOP - while (op_without_pop[opcode] || opcode == _POP_CALL || opcode == _POP_CALL_LOAD_CONST_INLINE_BORROW) { + while (op_without_pop[opcode] || + opcode == _POP_CALL || + opcode == _POP_CALL_LOAD_CONST_INLINE_BORROW) { _PyUOpInstruction *last = &buffer[pc - 1]; while (op_skip[last->opcode]) { last--; From bf2786780efae4c9401f7873e120467f132c37be Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Thu, 22 May 2025 18:06:40 +0200 Subject: [PATCH 6/9] PEP7 --- Python/optimizer_analysis.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 5b2923a54ae9b7..451c633ac1fe37 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -613,7 +613,8 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) // _NOP + _POP_TOP + _NOP while (op_without_pop[opcode] || opcode == _POP_CALL || - opcode == _POP_CALL_LOAD_CONST_INLINE_BORROW) { + opcode == _POP_CALL_LOAD_CONST_INLINE_BORROW) + { _PyUOpInstruction *last = &buffer[pc - 1]; while (op_skip[last->opcode]) { last--; From 2f90609602b0de1894fd43e31d7030808132e6c6 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Thu, 22 May 2025 18:07:27 +0200 Subject: [PATCH 7/9] PEP7 vol. 2 --- Python/optimizer_analysis.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 451c633ac1fe37..aa50831af959cf 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -638,7 +638,8 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) else { opcode = buffer[pc].opcode = _POP_TOP_LOAD_CONST_INLINE_BORROW; } - } else { + } + else { break; } } From 5db29d8974bfc665db5ea081b880917ffeb23979 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Thu, 22 May 2025 18:11:21 +0200 Subject: [PATCH 8/9] Simplify logic --- Python/optimizer_analysis.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index aa50831af959cf..99e25591b51892 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -571,10 +571,11 @@ const uint16_t op_without_pop[MAX_UOP_ID + 1] = { [_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW] = _POP_CALL_LOAD_CONST_INLINE_BORROW, [_POP_CALL_TWO] = _POP_CALL_ONE, [_POP_CALL_ONE] = _POP_CALL, - // The following two instructions are handled separately in remove_unneeded_uops. - // The TOS for both is null so calling _POP_TOP (which closes the stackref) is not safe. - // [_POP_CALL] = _POP_TOP, - // [_POP_CALL_LOAD_CONST_INLINE_BORROW] = _POP_TOP_LOAD_CONST_INLINE_BORROW, +}; + +const uint16_t op_without_pop_null[MAX_UOP_ID + 1] = { + [_POP_CALL] = _POP_TOP, + [_POP_CALL_LOAD_CONST_INLINE_BORROW] = _POP_TOP_LOAD_CONST_INLINE_BORROW, }; @@ -611,9 +612,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) // _LOAD_FAST + _POP_TWO_LOAD_CONST_INLINE_BORROW + _POP_TOP // ...becomes: // _NOP + _POP_TOP + _NOP - while (op_without_pop[opcode] || - opcode == _POP_CALL || - opcode == _POP_CALL_LOAD_CONST_INLINE_BORROW) + while (op_without_pop[opcode] || op_without_pop_null[opcode]) { _PyUOpInstruction *last = &buffer[pc - 1]; while (op_skip[last->opcode]) { @@ -632,12 +631,8 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) // This looks for a preceding _PUSH_NULL instruction and // simplifies to _POP_TOP(_LOAD_CONST_INLINE_BORROW). last->opcode = _NOP; - if (opcode == _POP_CALL) { - opcode = buffer[pc].opcode = _POP_TOP; - } - else { - opcode = buffer[pc].opcode = _POP_TOP_LOAD_CONST_INLINE_BORROW; - } + opcode = buffer[pc].opcode = op_without_pop_null[opcode]; + assert(opcode); } else { break; From efbded1d012b82e148f62e8598586ccbdc5f6bdf Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Thu, 22 May 2025 18:12:40 +0200 Subject: [PATCH 9/9] Style fixes --- Lib/test/test_capi/test_opt.py | 1 - Python/optimizer_analysis.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index f19669f69512a9..cb6eae484149ee 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2007,7 +2007,6 @@ def testfunc(n): self.assertNotIn("_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW", uops) self.assertNotIn("_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", uops) - def test_call_list_append(self): def testfunc(n): a = [] diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 99e25591b51892..851e1efa0497af 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -612,8 +612,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) // _LOAD_FAST + _POP_TWO_LOAD_CONST_INLINE_BORROW + _POP_TOP // ...becomes: // _NOP + _POP_TOP + _NOP - while (op_without_pop[opcode] || op_without_pop_null[opcode]) - { + while (op_without_pop[opcode] || op_without_pop_null[opcode]) { _PyUOpInstruction *last = &buffer[pc - 1]; while (op_skip[last->opcode]) { last--; pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy