Skip to content

DEPR: concat ignoring empty objects #52532

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Jul 10, 2023
Merged
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
63292d4
DEPR: concat with empty objects
jbrockmendel Apr 7, 2023
2ace79c
xfail on 32bit
jbrockmendel Apr 8, 2023
6258adf
missing reason
jbrockmendel Apr 8, 2023
bfd969f
Merge branch 'main' into depr-concat-empty
jbrockmendel Apr 10, 2023
51e6d36
Fix AM build
jbrockmendel Apr 10, 2023
52ce0d7
post-merge fixup
jbrockmendel Apr 10, 2023
f8dc81e
Merge branch 'main' into depr-concat-empty
jbrockmendel Apr 11, 2023
163bf8a
catch more specifically
jbrockmendel Apr 11, 2023
03a0641
un-xfail
jbrockmendel Apr 12, 2023
49a7146
Merge branch 'main' into depr-concat-empty
jbrockmendel Apr 12, 2023
7e2e995
mypy fixup
jbrockmendel Apr 12, 2023
7c0c715
Merge branch 'main' into depr-concat-empty
jbrockmendel Apr 13, 2023
7f2977a
Merge branch 'main' into depr-concat-empty
jbrockmendel Apr 13, 2023
0eaf359
Merge branch 'main' into depr-concat-empty
jbrockmendel Apr 17, 2023
a878fea
Merge branch 'main' into depr-concat-empty
jbrockmendel Apr 18, 2023
75d5041
update test
jbrockmendel Apr 18, 2023
9e2de8f
Merge branch 'main' into depr-concat-empty
jbrockmendel May 4, 2023
392b40a
Fix broken test
jbrockmendel May 4, 2023
465c141
Merge branch 'main' into depr-concat-empty
jbrockmendel May 16, 2023
3666bca
remove duplicate whatsnew entries
jbrockmendel May 16, 2023
390d4ef
Merge branch 'main' into depr-concat-empty
jbrockmendel May 22, 2023
aa5794f
Merge branch 'main' into depr-concat-empty
jbrockmendel May 23, 2023
1277b26
Merge branch 'main' into depr-concat-empty
jbrockmendel May 24, 2023
5cddae9
Merge branch 'main' into depr-concat-empty
jbrockmendel May 24, 2023
8e58bff
Merge branch 'main' into depr-concat-empty
jbrockmendel May 25, 2023
47a17b3
Merge branch 'main' into depr-concat-empty
jbrockmendel May 25, 2023
e696c53
remove unused
jbrockmendel May 25, 2023
7f07121
Merge branch 'main' into depr-concat-empty
jbrockmendel May 26, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
catch more specifically
  • Loading branch information
jbrockmendel committed Apr 11, 2023
commit 163bf8a9e62c75b5d416c03fb51990ee0063c13b
128 changes: 39 additions & 89 deletions pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,9 @@
find_common_type,
)
from pandas.core.dtypes.common import is_dtype_equal
from pandas.core.dtypes.dtypes import (
CategoricalDtype,
DatetimeTZDtype,
ExtensionDtype,
)
from pandas.core.dtypes.dtypes import CategoricalDtype
from pandas.core.dtypes.generic import (
ABCCategoricalIndex,
ABCExtensionArray,
ABCSeries,
)

Expand Down Expand Up @@ -106,15 +101,17 @@ def concat_compat(
# Creating an empty array directly is tempting, but the winnings would be
# marginal given that it would still require shape & dtype calculation and
# np.concatenate which has them both implemented is compiled.
orig = to_concat
non_empties = [x for x in to_concat if _is_nonempty(x, axis)]
if non_empties and axis == 0 and not ea_compat_axis:
# ea_compat_axis see GH#39574
if len(non_empties) < len(to_concat) and not any(
obj.dtype == _dtype_obj for obj in non_empties
):
# Check for object dtype is an imperfect proxy for checking if
# the result dtype is going to change once the deprecation is
# enforced.
to_concat = non_empties

any_ea, kinds, target_dtype = _get_result_dtype(to_concat, non_empties)

if len(to_concat) < len(orig):
_, _, alt_dtype = _get_result_dtype(orig, non_empties)
if alt_dtype != target_dtype:
# GH#39122
warnings.warn(
"The behavior of array concatenation with empty entries is "
Expand All @@ -125,42 +122,42 @@ def concat_compat(
FutureWarning,
stacklevel=find_stack_level(),
)
to_concat = non_empties

dtypes = {obj.dtype for obj in to_concat}
kinds = {obj.dtype.kind for obj in to_concat}
contains_datetime = any(
isinstance(dtype, (np.dtype, DatetimeTZDtype)) and dtype.kind in "mM"
for dtype in dtypes
) or any(isinstance(obj, ABCExtensionArray) and obj.ndim > 1 for obj in to_concat)
if target_dtype is not None:
to_concat = [astype_array(arr, target_dtype, copy=False) for arr in to_concat]

all_empty = not len(non_empties)
single_dtype = len(dtypes) == 1
any_ea = any(isinstance(x, ExtensionDtype) for x in dtypes)
if not isinstance(to_concat[0], np.ndarray):
# i.e. isinstance(to_concat[0], ExtensionArray)
to_concat_eas = cast("Sequence[ExtensionArray]", to_concat)
cls = type(to_concat[0])
return cls._concat_same_type(to_concat_eas)
else:
to_concat_arrs = cast("Sequence[np.ndarray]", to_concat)
result = np.concatenate(to_concat_arrs, axis=axis)

if not any_ea and "b" in kinds and result.dtype.kind in "iuf":
# GH#39817 cast to object instead of casting bools to numeric
result = result.astype(object, copy=False)
return result

if contains_datetime:
return _concat_datetime(to_concat, axis=axis)

def _get_result_dtype(to_concat: Sequence[ArrayLike], non_empties: Sequence[ArrayLike]):
target_dtype = None

dtypes = {obj.dtype for obj in to_concat}
kinds = {obj.dtype.kind for obj in to_concat}

any_ea = any(not isinstance(x, np.ndarray) for x in to_concat)
if any_ea:
# i.e. any ExtensionArrays

# we ignore axis here, as internally concatting with EAs is always
# for axis=0
if not single_dtype:
if len(dtypes) != 1:
target_dtype = find_common_type([x.dtype for x in to_concat])
target_dtype = common_dtype_categorical_compat(to_concat, target_dtype)
to_concat = [
astype_array(arr, target_dtype, copy=False) for arr in to_concat
]

if isinstance(to_concat[0], ABCExtensionArray):
# TODO: what about EA-backed Index?
to_concat_eas = cast("Sequence[ExtensionArray]", to_concat)
cls = type(to_concat[0])
return cls._concat_same_type(to_concat_eas)
else:
to_concat_arrs = cast("Sequence[np.ndarray]", to_concat)
return np.concatenate(to_concat_arrs)

elif all_empty:
elif not len(non_empties):
# we have all empties, but may need to coerce the result dtype to
# object if we have non-numeric type operands (numpy would otherwise
# cast this to float)
Expand All @@ -170,17 +167,12 @@ def concat_compat(
pass
else:
# coerce to object
to_concat = [x.astype("object") for x in to_concat]
target_dtype = np.dtype(object)
kinds = {"o"}
else:
target_dtype = np.find_common_type(list(dtypes), [])

# error: Argument 1 to "concatenate" has incompatible type
# "Sequence[Union[ExtensionArray, ndarray[Any, Any]]]"; expected
# "Union[_SupportsArray[dtype[Any]], _NestedSequence[_SupportsArray[dtype[Any]]]]"
result: np.ndarray = np.concatenate(to_concat, axis=axis) # type: ignore[arg-type]
if "b" in kinds and result.dtype.kind in "iuf":
# GH#39817 cast to object instead of casting bools to numeric
result = result.astype(object, copy=False)
return result
return any_ea, kinds, target_dtype


def union_categoricals(
Expand Down Expand Up @@ -347,45 +339,3 @@ def _maybe_unwrap(x):

dtype = CategoricalDtype(categories=categories, ordered=ordered)
return Categorical._simple_new(new_codes, dtype=dtype)


def _concatenate_2d(to_concat: Sequence[np.ndarray], axis: AxisInt) -> np.ndarray:
# coerce to 2d if needed & concatenate
if axis == 1:
to_concat = [np.atleast_2d(x) for x in to_concat]
return np.concatenate(to_concat, axis=axis)


def _concat_datetime(to_concat: Sequence[ArrayLike], axis: AxisInt = 0) -> ArrayLike:
"""
provide concatenation of an datetimelike array of arrays each of which is a
single M8[ns], datetime64[ns, tz] or m8[ns] dtype

Parameters
----------
to_concat : sequence of arrays
axis : axis to provide concatenation

Returns
-------
a single array, preserving the combined dtypes
"""
from pandas.core.construction import ensure_wrapped_if_datetimelike

to_concat = [ensure_wrapped_if_datetimelike(x) for x in to_concat]

single_dtype = lib.dtypes_all_equal([x.dtype for x in to_concat])

# multiple types, need to coerce to object
if not single_dtype:
# ensure_wrapped_if_datetimelike ensures that astype(object) wraps
# in Timestamp/Timedelta
return _concatenate_2d([x.astype(object) for x in to_concat], axis=axis)

# error: Unexpected keyword argument "axis" for "_concat_same_type" of
# "ExtensionArray"
to_concat_eas = cast("list[ExtensionArray]", to_concat)
result = type(to_concat_eas[0])._concat_same_type( # type: ignore[call-arg]
to_concat_eas, axis=axis
)
return result
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy