diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 7be1c9eebb3bf9..c796b0dd4b5b7e 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -862,7 +862,8 @@ def testfunc(n): uops = get_opnames(ex) self.assertNotIn("_GUARD_TOS_INT", uops) self.assertNotIn("_GUARD_NOS_INT", uops) - self.assertIn("_BINARY_OP_ADD_INT", uops) + self.assertNotIn("_BINARY_OP_ADD_INT", uops) + self.assertNotIn("_POP_TWO_LOAD_CONST_INLINE_BORROW", uops) # Try again, but between the runs, set the global to a float. # This should result in no executor the second time. ns = {} @@ -1462,7 +1463,8 @@ def testfunc(n): self.assertEqual(res, 3) self.assertIsNotNone(ex) uops = get_opnames(ex) - self.assertIn("_BINARY_OP_ADD_INT", uops) + self.assertNotIn("_BINARY_OP_ADD_INT", uops) + self.assertNotIn("_POP_TWO_LOAD_CONST_INLINE_BORROW", uops) self.assertNotIn("_GUARD_NOS_INT", uops) self.assertNotIn("_GUARD_TOS_INT", uops) @@ -1612,7 +1614,7 @@ def f(n): # But all of the appends we care about are still there: self.assertEqual(uops.count("_CALL_LIST_APPEND"), len("ABCDEFG")) - def test_compare_pop_two_load_const_inline_borrow(self): + def test_compare_op_int_pop_two_load_const_inline_borrow(self): def testfunc(n): x = 0 for _ in range(n): @@ -1629,6 +1631,40 @@ def testfunc(n): self.assertNotIn("_COMPARE_OP_INT", uops) self.assertNotIn("_POP_TWO_LOAD_CONST_INLINE_BORROW", uops) + def test_compare_op_str_pop_two_load_const_inline_borrow(self): + def testfunc(n): + x = 0 + for _ in range(n): + a = "foo" + b = "foo" + if a == b: + x += 1 + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertNotIn("_COMPARE_OP_STR", uops) + self.assertNotIn("_POP_TWO_LOAD_CONST_INLINE_BORROW", uops) + + def test_compare_op_float_pop_two_load_const_inline_borrow(self): + def testfunc(n): + x = 0 + for _ in range(n): + a = 1.0 + b = 1.0 + if a == b: + x += 1 + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertNotIn("_COMPARE_OP_FLOAT", uops) + self.assertNotIn("_POP_TWO_LOAD_CONST_INLINE_BORROW", uops) + def test_to_bool_bool_contains_op_set(self): """ Test that _TO_BOOL_BOOL is removed from code like: diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-07-24-02-13-59.gh-issue-132732.p77xkb.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-24-02-13-59.gh-issue-132732.p77xkb.rst new file mode 100644 index 00000000000000..df77fe6715d812 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-24-02-13-59.gh-issue-132732.p77xkb.rst @@ -0,0 +1 @@ +Optimize constant comparison for ``_COMPARE_OP_INT``, ``_COMPARE_OP_FLOAT`` and ``_COMPARE_OP_STR`` in JIT builds diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index fab6fef5ccda10..dd3e49b83d9971 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -28,6 +28,7 @@ #include "pycore_range.h" #include "pycore_unicodeobject.h" #include "pycore_ceval.h" +#include "pycore_floatobject.h" #include #include diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index aeff76affd8ace..77759f67532f80 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -430,31 +430,17 @@ dummy_func(void) { } op(_COMPARE_OP_INT, (left, right -- res)) { - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyLong_CheckExact(sym_get_const(ctx, left))); - assert(PyLong_CheckExact(sym_get_const(ctx, right))); - PyObject *tmp = PyObject_RichCompare(sym_get_const(ctx, left), - sym_get_const(ctx, right), - oparg >> 5); - if (tmp == NULL) { - goto error; - } - assert(PyBool_Check(tmp)); - assert(_Py_IsImmortal(tmp)); - REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)tmp); - res = sym_new_const(ctx, tmp); - Py_DECREF(tmp); - } - else { - res = sym_new_type(ctx, &PyBool_Type); - } + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); + res = sym_new_type(ctx, &PyBool_Type); } op(_COMPARE_OP_FLOAT, (left, right -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); res = sym_new_type(ctx, &PyBool_Type); } op(_COMPARE_OP_STR, (left, right -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); res = sym_new_type(ctx, &PyBool_Type); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 41402200c1683e..99206f01618d79 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -436,6 +436,14 @@ PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); /* End of uop copied from bytecodes for constant evaluation */ res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + + if (sym_is_const(ctx, res)) { + PyObject *result = sym_get_const(ctx, res); + if (_Py_IsImmortal(result)) { + // Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result + REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); + } + } stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -479,6 +487,14 @@ PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); /* End of uop copied from bytecodes for constant evaluation */ res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + + if (sym_is_const(ctx, res)) { + PyObject *result = sym_get_const(ctx, res); + if (_Py_IsImmortal(result)) { + // Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result + REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); + } + } stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -522,6 +538,14 @@ PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); /* End of uop copied from bytecodes for constant evaluation */ res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + + if (sym_is_const(ctx, res)) { + PyObject *result = sym_get_const(ctx, res); + if (_Py_IsImmortal(result)) { + // Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result + REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); + } + } stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -584,6 +608,14 @@ } /* End of uop copied from bytecodes for constant evaluation */ res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + + if (sym_is_const(ctx, res)) { + PyObject *result = sym_get_const(ctx, res); + if (_Py_IsImmortal(result)) { + // Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result + REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); + } + } stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -629,6 +661,14 @@ } /* End of uop copied from bytecodes for constant evaluation */ res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + + if (sym_is_const(ctx, res)) { + PyObject *result = sym_get_const(ctx, res); + if (_Py_IsImmortal(result)) { + // Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result + REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); + } + } stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -674,6 +714,14 @@ } /* End of uop copied from bytecodes for constant evaluation */ res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + + if (sym_is_const(ctx, res)) { + PyObject *result = sym_get_const(ctx, res); + if (_Py_IsImmortal(result)) { + // Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result + REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); + } + } stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -746,6 +794,14 @@ res_stackref = PyStackRef_FromPyObjectSteal(res_o); /* End of uop copied from bytecodes for constant evaluation */ res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + + if (sym_is_const(ctx, res)) { + PyObject *result = sym_get_const(ctx, res); + if (_Py_IsImmortal(result)) { + // Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result + REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); + } + } stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -1598,7 +1654,45 @@ } case _COMPARE_OP_FLOAT: { + JitOptRef right; + JitOptRef left; JitOptRef res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + STAT_INC(COMPARE_OP, hit); + double dleft = PyFloat_AS_DOUBLE(left_o); + double dright = PyFloat_AS_DOUBLE(right_o); + int sign_ish = COMPARISON_BIT(dleft, dright); + PyStackRef_CLOSE_SPECIALIZED(left, _PyFloat_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyFloat_ExactDealloc); + res_stackref = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False; + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + + if (sym_is_const(ctx, res)) { + PyObject *result = sym_get_const(ctx, res); + if (_Py_IsImmortal(result)) { + // Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result + REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); + } + } + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } res = sym_new_type(ctx, &PyBool_Type); stack_pointer[-2] = res; stack_pointer += -1; @@ -1612,34 +1706,93 @@ JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyLong_CheckExact(sym_get_const(ctx, left))); - assert(PyLong_CheckExact(sym_get_const(ctx, right))); - PyObject *tmp = PyObject_RichCompare(sym_get_const(ctx, left), - sym_get_const(ctx, right), - oparg >> 5); - if (tmp == NULL) { - goto error; + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(_PyLong_IsCompact((PyLongObject *)left_o)); + assert(_PyLong_IsCompact((PyLongObject *)right_o)); + STAT_INC(COMPARE_OP, hit); + assert(_PyLong_DigitCount((PyLongObject *)left_o) <= 1 && + _PyLong_DigitCount((PyLongObject *)right_o) <= 1); + Py_ssize_t ileft = _PyLong_CompactValue((PyLongObject *)left_o); + Py_ssize_t iright = _PyLong_CompactValue((PyLongObject *)right_o); + int sign_ish = COMPARISON_BIT(ileft, iright); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + res_stackref = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False; + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + + if (sym_is_const(ctx, res)) { + PyObject *result = sym_get_const(ctx, res); + if (_Py_IsImmortal(result)) { + // Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result + REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); + } } - assert(PyBool_Check(tmp)); - assert(_Py_IsImmortal(tmp)); - REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)tmp); - res = sym_new_const(ctx, tmp); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - Py_DECREF(tmp); - } - else { - res = sym_new_type(ctx, &PyBool_Type); - stack_pointer += -1; + break; } - stack_pointer[-1] = res; + res = sym_new_type(ctx, &PyBool_Type); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); break; } case _COMPARE_OP_STR: { + JitOptRef right; + JitOptRef left; JitOptRef res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + STAT_INC(COMPARE_OP, hit); + int eq = _PyUnicode_Equal(left_o, right_o); + assert((oparg >> 5) == Py_EQ || (oparg >> 5) == Py_NE); + PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); + assert(eq == 0 || eq == 1); + assert((oparg & 0xf) == COMPARISON_NOT_EQUALS || (oparg & 0xf) == COMPARISON_EQUALS); + assert(COMPARISON_NOT_EQUALS + 1 == COMPARISON_EQUALS); + res_stackref = ((COMPARISON_NOT_EQUALS + eq) & oparg) ? PyStackRef_True : PyStackRef_False; + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + + if (sym_is_const(ctx, res)) { + PyObject *result = sym_get_const(ctx, res); + if (_Py_IsImmortal(result)) { + // Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result + REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); + } + } + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } res = sym_new_type(ctx, &PyBool_Type); stack_pointer[-2] = res; stack_pointer += -1; @@ -2730,6 +2883,14 @@ res_stackref = PyStackRef_FromPyObjectSteal(res_o); /* End of uop copied from bytecodes for constant evaluation */ res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + + if (sym_is_const(ctx, res)) { + PyObject *result = sym_get_const(ctx, res); + if (_Py_IsImmortal(result)) { + // Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result + REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); + } + } stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Tools/cases_generator/optimizer_generator.py b/Tools/cases_generator/optimizer_generator.py index ea9dd836d98e22..b9985eaf48309d 100644 --- a/Tools/cases_generator/optimizer_generator.py +++ b/Tools/cases_generator/optimizer_generator.py @@ -232,6 +232,19 @@ def replace_opcode_if_evaluates_pure( emitter.emit(f"{outp.name} = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal({outp.name}_stackref));\n") else: emitter.emit(f"{outp.name} = sym_new_const(ctx, PyStackRef_AsPyObjectBorrow({outp.name}_stackref));\n") + + if len(used_stack_inputs) == 2 and len(self.original_uop.stack.outputs) == 1: + outp = self.original_uop.stack.outputs[0] + if not outp.peek: + emitter.emit(f""" + if (sym_is_const(ctx, {outp.name})) {{ + PyObject *result = sym_get_const(ctx, {outp.name}); + if (_Py_IsImmortal(result)) {{ + // Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result + REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); + }} + }}""") + storage.flush(self.out) emitter.emit("break;\n") emitter.emit("}\n") pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy