MAINT: simplify power fast path logic (#27901)

MaanasArora · web-flow · commit 52162afabd9b · 2025-01-07T14:55:58.000+01:00
* MAINT: remove fast paths from array power

* MAINT: Add fast paths to power loops

* MAINT: Clean loops for integer power in umath

* MAINT: Remove blocking regression test for power fast paths

* MAINT: Add helper function for power fast paths

* BUG: Change misspelled bitwise and to logical and

* BUG: Fix missing value on power helper return

* BUG: Fix exponent bitwise logic in power fast paths

* MAINT: Add power fast paths to floating point umath

* MAINT: Add fast power paths to array power when exponent is python object

* MAINT: Fix division by zero runtime warning in test regression

* MAINT: Adapt object regression test for linalg to power fast paths

* MAINT: Remove incorrect declarations in power fast paths

* MAINT: Reduce calls to power fast path helper when scalar is ineligible

* MAINT: Fix missing sliding loop

* BUG: Fix syntax error

* MAINT: Fix semantic misuse of -1 for non-error returns

* MAINT: Improve error checking in power fast paths to remove PyErr_Clear

* MAINT: Improve type checking in power fast paths

* MAINT: Efficient handling of ones arrays in scalar fast paths

* MAINT: Simplify outer check for scalar power fast paths

* MAINT: Reduce code reuse in float power fast paths and add reciprocal

* MAINT: Remove Python scalar checking for fast power paths

* MAINT: Add benchmarks for power operators in float binary bench

* MAINT: Add scalar power fast paths

* BUG: Add missing pointer cast

* BUG: Allow scalar power fast paths only for non-integers

* MAINT: Restore outdated changes in regression test to master
diff --git a/benchmarks/benchmarks/bench_ufunc.py b/benchmarks/benchmarks/bench_ufunc.py
@@ -588,6 +588,12 @@ def time_pow_2(self, dtype):
     def time_pow_half(self, dtype):
         np.power(self.a, 0.5)
 
+    def time_pow_2_op(self, dtype):
+        self.a ** 2
+
+    def time_pow_half_op(self, dtype):
+        self.a ** 0.5
+
     def time_atan2(self, dtype):
         np.arctan2(self.a, self.b)
 
diff --git a/numpy/_core/src/multiarray/number.c b/numpy/_core/src/multiarray/number.c
@@ -328,165 +328,53 @@ array_inplace_matrix_multiply(PyArrayObject *self, PyObject *other)
     return res;
 }
 
-/*
- * Determine if object is a scalar and if so, convert the object
- * to a double and place it in the out_exponent argument
- * and return the "scalar kind" as a result.   If the object is
- * not a scalar (or if there are other error conditions)
- * return NPY_NOSCALAR, and out_exponent is undefined.
- */
-static NPY_SCALARKIND
-is_scalar_with_conversion(PyObject *o2, double* out_exponent)
+static int
+fast_scalar_power(PyObject *o1, PyObject *o2, int inplace, PyObject **result)
 {
-    PyObject *temp;
-    const int optimize_fpexps = 1;
-
-    if (PyLong_Check(o2)) {
-        long tmp = PyLong_AsLong(o2);
-        if (error_converting(tmp)) {
-            PyErr_Clear();
-            return NPY_NOSCALAR;
+    PyObject *fastop = NULL;
+    if (PyLong_CheckExact(o2)) {
+        int overflow = 0;
+        long exp = PyLong_AsLongAndOverflow(o2, &overflow);
+        if (overflow != 0) {
+            return -1;
         }
-        *out_exponent = (double)tmp;
-        return NPY_INTPOS_SCALAR;
-    }
 
-    if (optimize_fpexps && PyFloat_Check(o2)) {
-        *out_exponent = PyFloat_AsDouble(o2);
-        return NPY_FLOAT_SCALAR;
-    }
-
-    if (PyArray_Check(o2)) {
-        if ((PyArray_NDIM((PyArrayObject *)o2) == 0) &&
-                ((PyArray_ISINTEGER((PyArrayObject *)o2) ||
-                 (optimize_fpexps && PyArray_ISFLOAT((PyArrayObject *)o2))))) {
-            temp = Py_TYPE(o2)->tp_as_number->nb_float(o2);
-            if (temp == NULL) {
-                return NPY_NOSCALAR;
-            }
-            *out_exponent = PyFloat_AsDouble(o2);
-            Py_DECREF(temp);
-            if (PyArray_ISINTEGER((PyArrayObject *)o2)) {
-                return NPY_INTPOS_SCALAR;
-            }
-            else { /* ISFLOAT */
-                return NPY_FLOAT_SCALAR;
-            }
+        if (exp == -1) {
+            fastop = n_ops.reciprocal;
         }
-    }
-    else if (PyArray_IsScalar(o2, Integer) ||
-                (optimize_fpexps && PyArray_IsScalar(o2, Floating))) {
-        temp = Py_TYPE(o2)->tp_as_number->nb_float(o2);
-        if (temp == NULL) {
-            return NPY_NOSCALAR;
-        }
-        *out_exponent = PyFloat_AsDouble(o2);
-        Py_DECREF(temp);
-
-        if (PyArray_IsScalar(o2, Integer)) {
-                return NPY_INTPOS_SCALAR;
+        else if (exp == 2) {
+            fastop = n_ops.square;
         }
-        else { /* IsScalar(o2, Floating) */
-            return NPY_FLOAT_SCALAR;
+        else {
+            return 1;
         }
     }
-    else if (PyIndex_Check(o2)) {
-        PyObject* value = PyNumber_Index(o2);
-        Py_ssize_t val;
-        if (value == NULL) {
-            if (PyErr_Occurred()) {
-                PyErr_Clear();
-            }
-            return NPY_NOSCALAR;
+    else if (PyFloat_CheckExact(o2)) {
+        double exp = PyFloat_AsDouble(o2);
+        if (exp == 0.5) {
+            fastop = n_ops.sqrt;
         }
-        val = PyLong_AsSsize_t(value);
-        Py_DECREF(value);
-        if (error_converting(val)) {
-            PyErr_Clear();
-            return NPY_NOSCALAR;
+        else {
+            return 1;
         }
-        *out_exponent = (double) val;
-        return NPY_INTPOS_SCALAR;
     }
-    return NPY_NOSCALAR;
-}
+    else {
+        return 1;
+    }
 
-/*
- * optimize float array or complex array to a scalar power
- * returns 0 on success, -1 if no optimization is possible
- * the result is in value (can be NULL if an error occurred)
- */
-static int
-fast_scalar_power(PyObject *o1, PyObject *o2, int inplace,
-                  PyObject **value)
-{
-    double exponent;
-    NPY_SCALARKIND kind;   /* NPY_NOSCALAR is not scalar */
-
-    if (PyArray_Check(o1) &&
-            !PyArray_ISOBJECT((PyArrayObject *)o1) &&
-            ((kind=is_scalar_with_conversion(o2, &exponent))>0)) {
-        PyArrayObject *a1 = (PyArrayObject *)o1;
-        PyObject *fastop = NULL;
-        if (PyArray_ISFLOAT(a1) || PyArray_ISCOMPLEX(a1)) {
-            if (exponent == 1.0) {
-                fastop = n_ops.positive;
-            }
-            else if (exponent == -1.0) {
-                fastop = n_ops.reciprocal;
-            }
-            else if (exponent ==  0.0) {
-                fastop = n_ops._ones_like;
-            }
-            else if (exponent ==  0.5) {
-                fastop = n_ops.sqrt;
-            }
-            else if (exponent ==  2.0) {
-                fastop = n_ops.square;
-            }
-            else {
-                return -1;
-            }
+    PyArrayObject *a1 = (PyArrayObject *)o1;
+    if (!(PyArray_ISFLOAT(a1) || PyArray_ISCOMPLEX(a1))) {
+        return 1;
+    }
 
-            if (inplace || can_elide_temp_unary(a1)) {
-                *value = PyArray_GenericInplaceUnaryFunction(a1, fastop);
-            }
-            else {
-                *value = PyArray_GenericUnaryFunction(a1, fastop);
-            }
-            return 0;
-        }
-        /* Because this is called with all arrays, we need to
-         *  change the output if the kind of the scalar is different
-         *  than that of the input and inplace is not on ---
-         *  (thus, the input should be up-cast)
-         */
-        else if (exponent == 2.0) {
-            fastop = n_ops.square;
-            if (inplace) {
-                *value = PyArray_GenericInplaceUnaryFunction(a1, fastop);
-            }
-            else {
-                /* We only special-case the FLOAT_SCALAR and integer types */
-                if (kind == NPY_FLOAT_SCALAR && PyArray_ISINTEGER(a1)) {
-                    PyArray_Descr *dtype = PyArray_DescrFromType(NPY_DOUBLE);
-                    a1 = (PyArrayObject *)PyArray_CastToType(a1, dtype,
-                            PyArray_ISFORTRAN(a1));
-                    if (a1 != NULL) {
-                        /* cast always creates a new array */
-                        *value = PyArray_GenericInplaceUnaryFunction(a1, fastop);
-                        Py_DECREF(a1);
-                    }
-                }
-                else {
-                    *value = PyArray_GenericUnaryFunction(a1, fastop);
-                }
-            }
-            return 0;
-        }
+    if (inplace || can_elide_temp_unary(a1)) {
+        *result = PyArray_GenericInplaceUnaryFunction(a1, fastop);
     }
-    /* no fast operation found */
-    return -1;
+    else {
+        *result = PyArray_GenericUnaryFunction(a1, fastop);
+    }
+
+    return 0;
 }
 
 static PyObject *
@@ -643,7 +531,8 @@ array_inplace_power(PyArrayObject *a1, PyObject *o2, PyObject *NPY_UNUSED(modulo
 
     INPLACE_GIVE_UP_IF_NEEDED(
             a1, o2, nb_inplace_power, array_inplace_power);
-    if (fast_scalar_power((PyObject *)a1, o2, 1, &value) != 0) {
+
+    if (fast_scalar_power((PyObject *) a1, o2, 1, &value) != 0) {
         value = PyArray_GenericInplaceBinaryFunction(a1, o2, n_ops.power);
     }
     return value;
diff --git a/numpy/_core/src/umath/loops.c.src b/numpy/_core/src/umath/loops.c.src
@@ -486,28 +486,54 @@ _@TYPE@_squared_exponentiation_helper(@type@ base, @type@ exponent_two, int firs
    return out;
 }
 
+static inline @type@
+_@TYPE@_power_fast_path_helper(@type@ in1, @type@ in2, @type@ *op1) {
+    // Fast path for power calculation
+    if (in2 == 0 || in1 == 1) {
+        *op1 = 1;
+    }
+    else if (in2 == 1) {
+        *op1 = in1;
+    }
+    else if (in2 == 2) {
+        *op1 = in1 * in1;
+    }
+    else {
+        return 1;
+    }
+    return 0;
+}
+
+
 NPY_NO_EXPORT void
 @TYPE@_power(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
 {
     if (steps[1]==0) {
         // stride for second argument is 0
         BINARY_DEFS
         const @type@ in2 = *(@type@ *)ip2;
-        #if @SIGNED@
-            if (in2 < 0) {
-                npy_gil_error(PyExc_ValueError,
-                              "Integers to negative integer powers are not allowed.");
-                return;
-            }
-        #endif
+
+#if @SIGNED@
+        if (in2 < 0) {
+            npy_gil_error(PyExc_ValueError,
+                            "Integers to negative integer powers are not allowed.");
+            return;
+        }
+#endif
 
         int first_bit = in2 & 1;
         @type@ in2start = in2 >> 1;
 
+        int fastop_exists = (in2 == 0) || (in2 == 1) || (in2 == 2);
+        
         BINARY_LOOP_SLIDING {
             @type@ in1 = *(@type@ *)ip1;
-
-            *((@type@ *) op1) = _@TYPE@_squared_exponentiation_helper(in1, in2start, first_bit);
+            if (fastop_exists) {
+                _@TYPE@_power_fast_path_helper(in1, in2, (@type@ *)op1);
+            }
+            else {
+                *((@type@ *) op1) = _@TYPE@_squared_exponentiation_helper(in1, in2start, first_bit);
+            }
         }
         return;
     }
@@ -518,22 +544,16 @@ NPY_NO_EXPORT void
 #if @SIGNED@
         if (in2 < 0) {
             npy_gil_error(PyExc_ValueError,
-                          "Integers to negative integer powers are not allowed.");
+                            "Integers to negative integer powers are not allowed.");
             return;
         }
 #endif
-        if (in2 == 0) {
-            *((@type@ *)op1) = 1;
-            continue;
-        }
-        if (in1 == 1) {
-            *((@type@ *)op1) = 1;
-            continue;
-        }
 
-        int first_bit = in2 & 1;
-        in2 >>= 1;
-        *((@type@ *) op1) = _@TYPE@_squared_exponentiation_helper(in1, in2, first_bit);
+        if (_@TYPE@_power_fast_path_helper(in1, in2, (@type@ *)op1) != 0) {
+            int first_bit = in2 & 1;
+            in2 >>= 1;
+            *((@type@ *) op1) = _@TYPE@_squared_exponentiation_helper(in1, in2, first_bit);
+        }
     }
 }
 /**end repeat**/
diff --git a/numpy/_core/src/umath/loops_umath_fp.dispatch.c.src b/numpy/_core/src/umath/loops_umath_fp.dispatch.c.src
@@ -239,11 +239,30 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_@func@)
     if (stride_zero) {
         BINARY_DEFS
         const @type@ in2 = *(@type@ *)ip2;
-        if (in2 == 2.0) {
-            BINARY_LOOP_SLIDING {
-                const @type@ in1 = *(@type@ *)ip1;
+        int fastop_found = 1;
+        BINARY_LOOP_SLIDING {
+            const @type@ in1 = *(@type@ *)ip1;
+            if (in2 == -1.0) {
+                *(@type@ *)op1 = 1.0 / in1;
+            }
+            else if (in2 == 0.0) {
+                *(@type@ *)op1 = 1.0;
+            }
+            else if (in2 == 0.5) {
+                *(@type@ *)op1 = @sqrt@(in1);
+            }
+            else if (in2 == 1.0) {
+                *(@type@ *)op1 = in1;
+            }
+            else if (in2 == 2.0) {
                 *(@type@ *)op1 = in1 * in1;
             }
+            else {
+                fastop_found = 0;
+                break;
+            }
+        }
+        if (fastop_found) {
             return;
         }
     }
diff --git a/numpy/_core/tests/test_multiarray.py b/numpy/_core/tests/test_multiarray.py
@@ -4125,27 +4125,6 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kw):
         assert_equal(A[0], 30)
         assert_(isinstance(A, OutClass))
 
-    def test_pow_override_with_errors(self):
-        # regression test for gh-9112
-        class PowerOnly(np.ndarray):
-            def __array_ufunc__(self, ufunc, method, *inputs, **kw):
-                if ufunc is not np.power:
-                    raise NotImplementedError
-                return "POWER!"
-        # explicit cast to float, to ensure the fast power path is taken.
-        a = np.array(5., dtype=np.float64).view(PowerOnly)
-        assert_equal(a ** 2.5, "POWER!")
-        with assert_raises(NotImplementedError):
-            a ** 0.5
-        with assert_raises(NotImplementedError):
-            a ** 0
-        with assert_raises(NotImplementedError):
-            a ** 1
-        with assert_raises(NotImplementedError):
-            a ** -1
-        with assert_raises(NotImplementedError):
-            a ** 2
-
     def test_pow_array_object_dtype(self):
         # test pow on arrays of object dtype
         class SomeClass: