From 6766c8de1757353676b757ccc4822d115dae6cb9 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Sun, 1 Jun 2025 20:06:09 +0200 Subject: [PATCH 1/5] BUG: Allow np.percentile to operate on float16 data --- numpy/lib/_function_base_impl.py | 27 ++++++++------ numpy/lib/tests/test_function_base.py | 54 ++++++++++++++++++++++----- 2 files changed, 60 insertions(+), 21 deletions(-) diff --git a/numpy/lib/_function_base_impl.py b/numpy/lib/_function_base_impl.py index 63346088b6e2..3dd6dd61b15f 100644 --- a/numpy/lib/_function_base_impl.py +++ b/numpy/lib/_function_base_impl.py @@ -124,7 +124,7 @@ # `_compute_virtual_index(n, quantiles, 1, 1)`. # They are mathematically equivalent. 'linear': { - 'get_virtual_index': lambda n, quantiles: (n - 1) * quantiles, + 'get_virtual_index': lambda n, quantiles: (n - np.int64(1)) * quantiles, 'fix_gamma': lambda gamma, _: gamma, }, 'median_unbiased': { @@ -4270,9 +4270,7 @@ def percentile(a, if a.dtype.kind == "c": raise TypeError("a must be an array of real numbers") - # Use dtype of array if possible (e.g., if q is a python int or float) - # by making the divisor have the dtype of the data array. - q = np.true_divide(q, a.dtype.type(100) if a.dtype.kind == "f" else 100, out=...) + q = np.true_divide(q, 100, out=...) if not _quantile_is_valid(q): raise ValueError("Percentiles must be in the range [0, 100]") @@ -4531,11 +4529,7 @@ def quantile(a, if a.dtype.kind == "c": raise TypeError("a must be an array of real numbers") - # Use dtype of array if possible (e.g., if q is a python int or float). - if isinstance(q, (int, float)) and a.dtype.kind == "f": - q = np.asanyarray(q, dtype=a.dtype) - else: - q = np.asanyarray(q) + q = np.asanyarray(q) if not _quantile_is_valid(q): raise ValueError("Quantiles must be in the range [0, 1]") @@ -4628,7 +4622,7 @@ def _compute_virtual_index(n, quantiles, alpha: float, beta: float): ) - 1 -def _get_gamma(virtual_indexes, previous_indexes, method): +def _get_gamma(virtual_indexes, previous_indexes, method, dtype=None): """ Compute gamma (a.k.a 'm' or 'weight') for the linear interpolation of quantiles. @@ -4649,7 +4643,7 @@ def _get_gamma(virtual_indexes, previous_indexes, method): gamma = method["fix_gamma"](gamma, virtual_indexes) # Ensure both that we have an array, and that we keep the dtype # (which may have been matched to the input array). - return np.asanyarray(gamma, dtype=virtual_indexes.dtype) + return np.asanyarray(gamma, dtype=dtype) def _lerp(a, b, t, out=None): @@ -4868,7 +4862,16 @@ def _quantile( previous = arr[previous_indexes] next = arr[next_indexes] # --- Linear interpolation - gamma = _get_gamma(virtual_indexes, previous_indexes, method_props) + if arr.dtype.kind in "iu": + gtype = None + elif arr.dtype.kind == "f": + # make sure the return value matches the input array type + gtype = arr.dtype + else: + gtype = virtual_indexes.dtype + + gamma = _get_gamma(virtual_indexes, previous_indexes, + method_props, gtype) result_shape = virtual_indexes.shape + (1,) * (arr.ndim - 1) gamma = gamma.reshape(result_shape) result = _lerp(previous, diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 50c61e6e04fa..71a3f545fdc9 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -3227,6 +3227,16 @@ def test_period(self): assert_almost_equal(np.interp(x, xp, fp, period=360), y) +quantile_methods = [ + 'inverted_cdf', 'averaged_inverted_cdf', 'closest_observation', + 'interpolated_inverted_cdf', 'hazen', 'weibull', 'linear', + 'median_unbiased', 'normal_unbiased', 'nearest', 'lower', 'higher', + 'midpoint'] + + +methods_supporting_weights = ["inverted_cdf"] + + class TestPercentile: def test_basic(self): @@ -3820,15 +3830,32 @@ def test_nat_basic(self, dtype, pos): res = np.percentile(a, 30, axis=0) assert_array_equal(np.isnat(res), [False, True, False]) - -quantile_methods = [ - 'inverted_cdf', 'averaged_inverted_cdf', 'closest_observation', - 'interpolated_inverted_cdf', 'hazen', 'weibull', 'linear', - 'median_unbiased', 'normal_unbiased', 'nearest', 'lower', 'higher', - 'midpoint'] - - -methods_supporting_weights = ["inverted_cdf"] + @pytest.mark.parametrize("qtype", [np.float16, np.float32]) + @pytest.mark.parametrize("method", quantile_methods) + def test_percentile_gh_29003(self, qtype, method): + zero = qtype(0) + one = qtype(0) + data = [zero] * 65521 + a = np.array(data) + a[:20_000] = one + z = np.percentile(a, 50, method=method) + assert z == zero + assert z.dtype == a.dtype + z = np.percentile(a, .9, method=method) + assert z == one + assert z.dtype == a.dtype + + def test_percentile_gh_29003_Fraction(self): + zero = Fraction(0) + one = Fraction(0) + data = [zero] * 65521 + a = np.array(data) + a[:20_000] = one + z = np.percentile(a, 50) + assert z == zero + z = np.percentile(a, Fraction(50)) + assert z == zero + assert np.array(z).dtype == a.dtype class TestQuantile: @@ -4194,6 +4221,15 @@ def test_closest_observation(self): assert_equal(4, np.quantile(arr[0:9], q, method=m)) assert_equal(5, np.quantile(arr, q, method=m)) + def test_quantile_gh_29003_Fraction(self): + r = np.quantile([1, 2], q=Fraction(1)) + assert r == Fraction(2) + assert isinstance(r, Fraction) + + r = np.quantile([1, 2], q=Fraction(.5)) + assert r == Fraction(3, 2) + assert isinstance(r, Fraction) + class TestLerp: @hypothesis.given(t0=st.floats(allow_nan=False, allow_infinity=False, From 2c59bea1bac1b424035a74fa6dc73ccd61618777 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Sun, 1 Jun 2025 20:54:03 +0200 Subject: [PATCH 2/5] add an extra regression test --- numpy/lib/tests/test_function_base.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 71a3f545fdc9..43387719eb5c 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -4230,6 +4230,13 @@ def test_quantile_gh_29003_Fraction(self): assert r == Fraction(3, 2) assert isinstance(r, Fraction) + def test_float16_gh_29003(self): + a = np.arange(50_001, dtype = np.float16) + q = .999 + value = np.quantile(a, q) + assert value == q * 50_000 + + class TestLerp: @hypothesis.given(t0=st.floats(allow_nan=False, allow_infinity=False, From 5a7ca195fce5939a3302131e4f3e930103c7c03b Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Sun, 1 Jun 2025 20:54:43 +0200 Subject: [PATCH 3/5] add an extra regression test --- numpy/lib/tests/test_function_base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 43387719eb5c..60cc089442eb 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -4231,13 +4231,12 @@ def test_quantile_gh_29003_Fraction(self): assert isinstance(r, Fraction) def test_float16_gh_29003(self): - a = np.arange(50_001, dtype = np.float16) + a = np.arange(50_001, dtype=np.float16) q = .999 value = np.quantile(a, q) assert value == q * 50_000 - class TestLerp: @hypothesis.given(t0=st.floats(allow_nan=False, allow_infinity=False, min_value=0, max_value=1), From 6f982e58878872f902fbc1959f4a19ec9ff750c0 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Sun, 1 Jun 2025 20:56:11 +0200 Subject: [PATCH 4/5] remove unused default value --- numpy/lib/_function_base_impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/lib/_function_base_impl.py b/numpy/lib/_function_base_impl.py index 3dd6dd61b15f..84aecc59ae9e 100644 --- a/numpy/lib/_function_base_impl.py +++ b/numpy/lib/_function_base_impl.py @@ -4622,7 +4622,7 @@ def _compute_virtual_index(n, quantiles, alpha: float, beta: float): ) - 1 -def _get_gamma(virtual_indexes, previous_indexes, method, dtype=None): +def _get_gamma(virtual_indexes, previous_indexes, method, dtype): """ Compute gamma (a.k.a 'm' or 'weight') for the linear interpolation of quantiles. From b1d973d93400428d5f7d4e1aea4c536ae321a78a Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Wed, 4 Jun 2025 23:17:43 +0200 Subject: [PATCH 5/5] add release note --- doc/release/upcoming_changes/29105.change.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/release/upcoming_changes/29105.change.rst diff --git a/doc/release/upcoming_changes/29105.change.rst b/doc/release/upcoming_changes/29105.change.rst new file mode 100644 index 000000000000..b5d4a9838f30 --- /dev/null +++ b/doc/release/upcoming_changes/29105.change.rst @@ -0,0 +1 @@ +* The accuracy of ``np.quantile`` and ``np.percentile`` for 16- and 32-bit floating point input data has been improved. \ No newline at end of file pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy