Skip to content

BUG: allow MaskedArray.fill_value be a string when dtype=StringDType #29423

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions doc/release/upcoming_changes/29423.new_feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
``StringDType`` fill_value support in `numpy.ma.MaskedArray`
------------------------------------------------------------
Masked arrays now accept and preserve a Python ``str`` as their ``fill_value`` when
using the variable‑width ``StringDType`` (kind ``'T'``), including through slicing
and views. The default is ``'N/A'`` and may be overridden by any valid string.
This fixes issue `gh‑29421 <https://github.com/numpy/numpy/issues/29421>`__ and was
implemented in pull request `gh‑29423 <https://github.com/numpy/numpy/pull/29423>`__.
26 changes: 14 additions & 12 deletions numpy/ma/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,8 @@ class MaskError(MAError):
'S': b'N/A',
'u': 999999,
'V': b'???',
'U': 'N/A'
'U': 'N/A',
'T': 'N/A'
}

# Add datetime64 and timedelta64 types
Expand Down Expand Up @@ -264,16 +265,17 @@ def default_fill_value(obj):
The default filling value depends on the datatype of the input
array or the type of the input scalar:

======== ========
datatype default
======== ========
bool True
int 999999
float 1.e20
complex 1.e20+0j
object '?'
string 'N/A'
======== ========
=========== ========
datatype default
=========== ========
bool True
int 999999
float 1.e20
complex 1.e20+0j
object '?'
string 'N/A'
StringDType 'N/A'
=========== ========

For structured types, a structured scalar is returned, with each field the
default fill value for its type.
Expand Down Expand Up @@ -498,7 +500,7 @@ def _check_fill_value(fill_value, ndtype):
fill_value = np.asarray(fill_value, dtype=object)
fill_value = np.array(_recursive_set_fill_value(fill_value, ndtype),
dtype=ndtype)
elif isinstance(fill_value, str) and (ndtype.char not in 'OSVU'):
elif isinstance(fill_value, str) and (ndtype.char not in 'OSTVU'):
# Note this check doesn't work if fill_value is not a scalar
err_msg = "Cannot set fill value of string with array of dtype %s"
raise TypeError(err_msg % ndtype)
Expand Down
77 changes: 72 additions & 5 deletions numpy/ma/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1807,7 +1807,7 @@ def test_eq_ne_structured_extra(self):
el_by_el = [m1[name] != m2[name] for name in dt.names]
assert_equal(array(el_by_el, dtype=bool).any(), ne_expected)

@pytest.mark.parametrize('dt', ['S', 'U'])
@pytest.mark.parametrize('dt', ['S', 'U', 'T'])
@pytest.mark.parametrize('fill', [None, 'A'])
def test_eq_for_strings(self, dt, fill):
# Test the equality of structured arrays
Expand Down Expand Up @@ -1839,7 +1839,7 @@ def test_eq_for_strings(self, dt, fill):
assert_equal(test.mask, [True, False])
assert_(test.fill_value == True)

@pytest.mark.parametrize('dt', ['S', 'U'])
@pytest.mark.parametrize('dt', ['S', 'U', 'T'])
@pytest.mark.parametrize('fill', [None, 'A'])
def test_ne_for_strings(self, dt, fill):
# Test the equality of structured arrays
Expand Down Expand Up @@ -1989,15 +1989,23 @@ def test_comparisons_for_numeric(self, op, dt1, dt2, fill):
assert_equal(test.mask, [True, False])
assert_(test.fill_value == True)

@pytest.mark.parametrize('dt', ['S', 'U', 'T'])
@pytest.mark.parametrize('op',
[operator.le, operator.lt, operator.ge, operator.gt])
@pytest.mark.parametrize('fill', [None, "N/A"])
def test_comparisons_strings(self, op, fill):
def test_comparisons_strings(self, dt, op, fill):
# See gh-21770, mask propagation is broken for strings (and some other
# cases) so we explicitly test strings here.
# In principle only == and != may need special handling...
ma1 = masked_array(["a", "b", "cde"], mask=[0, 1, 0], fill_value=fill)
ma2 = masked_array(["cde", "b", "a"], mask=[0, 1, 0], fill_value=fill)
ma1 = masked_array(["a", "b", "cde"], mask=[0, 1, 0], fill_value=fill, dtype=dt)
ma2 = masked_array(["cde", "b", "a"], mask=[0, 1, 0], fill_value=fill, dtype=dt)
assert_equal(op(ma1, ma2)._data, op(ma1._data, ma2._data))

if isinstance(fill, str):
fill = np.array(fill, dtype=dt)

ma1 = masked_array(["a", "b", "cde"], mask=[0, 1, 0], fill_value=fill, dtype=dt)
ma2 = masked_array(["cde", "b", "a"], mask=[0, 1, 0], fill_value=fill, dtype=dt)
assert_equal(op(ma1, ma2)._data, op(ma1._data, ma2._data))

@pytest.mark.filterwarnings("ignore:.*Comparison to `None`.*:FutureWarning")
Expand Down Expand Up @@ -5689,6 +5697,65 @@ def test_default_fill_value_complex():
assert_(default_fill_value(1 + 1j) == 1.e20 + 0.0j)


def test_string_dtype_fill_value_on_construction():
# Regression test for gh-29421: allow string fill_value on StringDType masked arrays
dt = np.dtypes.StringDType()
data = np.array(["A", "test", "variable", ""], dtype=dt)
mask = [True, False, True, True]
# Prior to the fix, this would TypeError; now it should succeed
arr = np.ma.MaskedArray(data, mask=mask, fill_value="FILL", dtype=dt)
assert isinstance(arr.fill_value, str)
assert arr.fill_value == "FILL"
filled = arr.filled()
# Masked positions should be replaced by 'FILL'
assert filled.tolist() == ["FILL", "test", "FILL", "FILL"]


def test_string_dtype_default_fill_value():
# Regression test for gh-29421: default fill_value for StringDType is 'N/A'
dt = np.dtypes.StringDType()
data = np.array(['x', 'y', 'z'], dtype=dt)
# no fill_value passed → uses default_fill_value internally
arr = np.ma.MaskedArray(data, mask=[True, False, True], dtype=dt)
# ensure it’s stored as a Python str and equals the expected default
assert isinstance(arr.fill_value, str)
assert arr.fill_value == 'N/A'
# masked slots should be replaced by that default
assert arr.filled().tolist() == ['N/A', 'y', 'N/A']


def test_string_dtype_fill_value_persists_through_slice():
# Regression test for gh-29421: .fill_value survives slicing/viewing
dt = np.dtypes.StringDType()
arr = np.ma.MaskedArray(
['a', 'b', 'c'],
mask=[True, False, True],
dtype=dt
)
arr.fill_value = 'Z'
# slice triggers __array_finalize__
sub = arr[1:]
# the slice should carry the same fill_value and behavior
assert isinstance(sub.fill_value, str)
assert sub.fill_value == 'Z'
assert sub.filled().tolist() == ['b', 'Z']


def test_setting_fill_value_attribute():
# Regression test for gh-29421: setting .fill_value post-construction works too
dt = np.dtypes.StringDType()
arr = np.ma.MaskedArray(
["x", "longstring", "mid"], mask=[False, True, False], dtype=dt
)
# Setting the attribute should not raise
arr.fill_value = "Z"
assert arr.fill_value == "Z"
# And filled() should use the new fill_value
assert arr.filled()[0] == "x"
assert arr.filled()[1] == "Z"
assert arr.filled()[2] == "mid"


def test_ufunc_with_output():
# check that giving an output argument always returns that output.
# Regression test for gh-8416.
Expand Down
Loading
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy