Skip to content

BUG: Allow np.percentile to operate on float16 data #29105

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/release/upcoming_changes/29105.change.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* The accuracy of ``np.quantile`` and ``np.percentile`` for 16- and 32-bit floating point input data has been improved.
27 changes: 15 additions & 12 deletions numpy/lib/_function_base_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@
# `_compute_virtual_index(n, quantiles, 1, 1)`.
# They are mathematically equivalent.
'linear': {
'get_virtual_index': lambda n, quantiles: (n - 1) * quantiles,
'get_virtual_index': lambda n, quantiles: (n - np.int64(1)) * quantiles,
'fix_gamma': lambda gamma, _: gamma,
},
'median_unbiased': {
Expand Down Expand Up @@ -4270,9 +4270,7 @@ def percentile(a,
if a.dtype.kind == "c":
raise TypeError("a must be an array of real numbers")

# Use dtype of array if possible (e.g., if q is a python int or float)
# by making the divisor have the dtype of the data array.
q = np.true_divide(q, a.dtype.type(100) if a.dtype.kind == "f" else 100, out=...)
q = np.true_divide(q, 100, out=...)
if not _quantile_is_valid(q):
raise ValueError("Percentiles must be in the range [0, 100]")

Expand Down Expand Up @@ -4531,11 +4529,7 @@ def quantile(a,
if a.dtype.kind == "c":
raise TypeError("a must be an array of real numbers")

# Use dtype of array if possible (e.g., if q is a python int or float).
if isinstance(q, (int, float)) and a.dtype.kind == "f":
q = np.asanyarray(q, dtype=a.dtype)
else:
q = np.asanyarray(q)
q = np.asanyarray(q)

if not _quantile_is_valid(q):
raise ValueError("Quantiles must be in the range [0, 1]")
Expand Down Expand Up @@ -4628,7 +4622,7 @@ def _compute_virtual_index(n, quantiles, alpha: float, beta: float):
) - 1


def _get_gamma(virtual_indexes, previous_indexes, method):
def _get_gamma(virtual_indexes, previous_indexes, method, dtype):
"""
Compute gamma (a.k.a 'm' or 'weight') for the linear interpolation
of quantiles.
Expand All @@ -4649,7 +4643,7 @@ def _get_gamma(virtual_indexes, previous_indexes, method):
gamma = method["fix_gamma"](gamma, virtual_indexes)
# Ensure both that we have an array, and that we keep the dtype
# (which may have been matched to the input array).
return np.asanyarray(gamma, dtype=virtual_indexes.dtype)
return np.asanyarray(gamma, dtype=dtype)


def _lerp(a, b, t, out=None):
Expand Down Expand Up @@ -4868,7 +4862,16 @@ def _quantile(
previous = arr[previous_indexes]
next = arr[next_indexes]
# --- Linear interpolation
gamma = _get_gamma(virtual_indexes, previous_indexes, method_props)
if arr.dtype.kind in "iu":
gtype = None
elif arr.dtype.kind == "f":
# make sure the return value matches the input array type
gtype = arr.dtype
else:
gtype = virtual_indexes.dtype

gamma = _get_gamma(virtual_indexes, previous_indexes,
method_props, gtype)
result_shape = virtual_indexes.shape + (1,) * (arr.ndim - 1)
gamma = gamma.reshape(result_shape)
result = _lerp(previous,
Expand Down
60 changes: 51 additions & 9 deletions numpy/lib/tests/test_function_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3227,6 +3227,16 @@ def test_period(self):
assert_almost_equal(np.interp(x, xp, fp, period=360), y)


quantile_methods = [
'inverted_cdf', 'averaged_inverted_cdf', 'closest_observation',
'interpolated_inverted_cdf', 'hazen', 'weibull', 'linear',
'median_unbiased', 'normal_unbiased', 'nearest', 'lower', 'higher',
'midpoint']


methods_supporting_weights = ["inverted_cdf"]


class TestPercentile:

def test_basic(self):
Expand Down Expand Up @@ -3820,15 +3830,32 @@ def test_nat_basic(self, dtype, pos):
res = np.percentile(a, 30, axis=0)
assert_array_equal(np.isnat(res), [False, True, False])


quantile_methods = [
'inverted_cdf', 'averaged_inverted_cdf', 'closest_observation',
'interpolated_inverted_cdf', 'hazen', 'weibull', 'linear',
'median_unbiased', 'normal_unbiased', 'nearest', 'lower', 'higher',
'midpoint']


methods_supporting_weights = ["inverted_cdf"]
@pytest.mark.parametrize("qtype", [np.float16, np.float32])
@pytest.mark.parametrize("method", quantile_methods)
def test_percentile_gh_29003(self, qtype, method):
zero = qtype(0)
one = qtype(0)
data = [zero] * 65521
a = np.array(data)
a[:20_000] = one
z = np.percentile(a, 50, method=method)
assert z == zero
assert z.dtype == a.dtype
z = np.percentile(a, .9, method=method)
assert z == one
assert z.dtype == a.dtype

def test_percentile_gh_29003_Fraction(self):
zero = Fraction(0)
one = Fraction(0)
data = [zero] * 65521
a = np.array(data)
a[:20_000] = one
z = np.percentile(a, 50)
assert z == zero
z = np.percentile(a, Fraction(50))
assert z == zero
assert np.array(z).dtype == a.dtype


class TestQuantile:
Expand Down Expand Up @@ -4194,6 +4221,21 @@ def test_closest_observation(self):
assert_equal(4, np.quantile(arr[0:9], q, method=m))
assert_equal(5, np.quantile(arr, q, method=m))

def test_quantile_gh_29003_Fraction(self):
r = np.quantile([1, 2], q=Fraction(1))
assert r == Fraction(2)
assert isinstance(r, Fraction)

r = np.quantile([1, 2], q=Fraction(.5))
assert r == Fraction(3, 2)
assert isinstance(r, Fraction)

def test_float16_gh_29003(self):
a = np.arange(50_001, dtype=np.float16)
q = .999
value = np.quantile(a, q)
assert value == q * 50_000


class TestLerp:
@hypothesis.given(t0=st.floats(allow_nan=False, allow_infinity=False,
Expand Down
Loading
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy