Skip to content

Commit 47f5fdf

Browse files
API: concatting DataFrames does not skip empty objects (#39035)
* Revert "BUG: casting on concat with empties (#38907)" This reverts commit 04282c7. * Revert "BUG: inconsistent concat casting EA vs non-EA (#38843)" This reverts commit 2362df9.
1 parent 6259b5a commit 47f5fdf

File tree

7 files changed

+16
-32
lines changed

7 files changed

+16
-32
lines changed

doc/source/whatsnew/v1.3.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,8 +312,8 @@ Reshaping
312312
- Bug in :func:`merge` raising error when performing an inner join with partial index and ``right_index`` when no overlap between indices (:issue:`33814`)
313313
- Bug in :meth:`DataFrame.unstack` with missing levels led to incorrect index names (:issue:`37510`)
314314
- Bug in :func:`join` over :class:`MultiIndex` returned wrong result, when one of both indexes had only one level (:issue:`36909`)
315-
- Bug in :func:`concat` incorrectly casting to ``object`` dtype in some cases when one or more of the operands is empty (:issue:`38843`, :issue:`38907`)
316315
- :meth:`merge_asof` raises ``ValueError`` instead of cryptic ``TypeError`` in case of non-numerical merge columns (:issue:`29130`)
316+
-
317317

318318
Sparse
319319
^^^^^^

pandas/core/dtypes/concat.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ def is_nonempty(x) -> bool:
127127
# marginal given that it would still require shape & dtype calculation and
128128
# np.concatenate which has them both implemented is compiled.
129129
non_empties = [x for x in to_concat if is_nonempty(x)]
130-
if non_empties:
130+
if non_empties and axis == 0:
131131
to_concat = non_empties
132132

133133
typs = _get_dtype_kinds(to_concat)

pandas/core/internals/concat.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -318,12 +318,6 @@ def _concatenate_join_units(
318318
# Concatenating join units along ax0 is handled in _merge_blocks.
319319
raise AssertionError("Concatenating join units along axis0")
320320

321-
nonempties = [
322-
x for x in join_units if x.block is None or x.block.shape[concat_axis] > 0
323-
]
324-
if nonempties:
325-
join_units = nonempties
326-
327321
empty_dtype, upcasted_na = _get_empty_dtype_and_na(join_units)
328322

329323
to_concat = [

pandas/tests/indexing/test_partial.py

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,7 @@ def test_partial_setting_mixed_dtype(self):
154154
# columns will align
155155
df = DataFrame(columns=["A", "B"])
156156
df.loc[0] = Series(1, index=range(4))
157-
expected = DataFrame(columns=["A", "B"], index=[0], dtype=int)
158-
tm.assert_frame_equal(df, expected)
157+
tm.assert_frame_equal(df, DataFrame(columns=["A", "B"], index=[0]))
159158

160159
# columns will align
161160
df = DataFrame(columns=["A", "B"])
@@ -171,21 +170,11 @@ def test_partial_setting_mixed_dtype(self):
171170
with pytest.raises(ValueError, match=msg):
172171
df.loc[0] = [1, 2, 3]
173172

174-
@pytest.mark.parametrize("dtype", [None, "int64", "Int64"])
175-
def test_loc_setitem_expanding_empty(self, dtype):
173+
# TODO: #15657, these are left as object and not coerced
176174
df = DataFrame(columns=["A", "B"])
175+
df.loc[3] = [6, 7]
177176

178-
value = [6, 7]
179-
if dtype == "int64":
180-
value = np.array(value, dtype=dtype)
181-
elif dtype == "Int64":
182-
value = pd.array(value, dtype=dtype)
183-
184-
df.loc[3] = value
185-
186-
exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype=dtype)
187-
if dtype is not None:
188-
exp = exp.astype(dtype)
177+
exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype="object")
189178
tm.assert_frame_equal(df, exp)
190179

191180
def test_series_partial_set(self):

pandas/tests/reshape/concat/test_append.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ def test_append_length0_frame(self, sort):
8282
df5 = df.append(df3, sort=sort)
8383

8484
expected = DataFrame(index=[0, 1], columns=["A", "B", "C"])
85-
expected["C"] = expected["C"].astype(np.float64)
8685
tm.assert_frame_equal(df5, expected)
8786

8887
def test_append_records(self):
@@ -341,11 +340,16 @@ def test_append_empty_frame_to_series_with_dateutil_tz(self):
341340
expected = DataFrame(
342341
[[np.nan, np.nan, 1.0, 2.0, date]], columns=["c", "d", "a", "b", "date"]
343342
)
343+
# These columns get cast to object after append
344+
expected["c"] = expected["c"].astype(object)
345+
expected["d"] = expected["d"].astype(object)
344346
tm.assert_frame_equal(result_a, expected)
345347

346348
expected = DataFrame(
347349
[[np.nan, np.nan, 1.0, 2.0, date]] * 2, columns=["c", "d", "a", "b", "date"]
348350
)
351+
expected["c"] = expected["c"].astype(object)
352+
expected["d"] = expected["d"].astype(object)
349353

350354
result_b = result_a.append(s, ignore_index=True)
351355
tm.assert_frame_equal(result_b, expected)

pandas/tests/reshape/concat/test_concat.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -474,12 +474,11 @@ def test_concat_will_upcast(dt, pdt):
474474
assert x.values.dtype == "float64"
475475

476476

477-
@pytest.mark.parametrize("dtype", ["int64", "Int64"])
478-
def test_concat_empty_and_non_empty_frame_regression(dtype):
477+
def test_concat_empty_and_non_empty_frame_regression():
479478
# GH 18178 regression test
480-
df1 = DataFrame({"foo": [1]}).astype(dtype)
479+
df1 = DataFrame({"foo": [1]})
481480
df2 = DataFrame({"foo": []})
482-
expected = df1
481+
expected = DataFrame({"foo": [1.0]})
483482
result = pd.concat([df1, df2])
484483
tm.assert_frame_equal(result, expected)
485484

pandas/tests/reshape/concat/test_empty.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -202,14 +202,12 @@ def test_concat_empty_series_dtypes_sparse(self):
202202
expected = pd.SparseDtype("object")
203203
assert result.dtype == expected
204204

205-
@pytest.mark.parametrize("dtype", ["int64", "Int64"])
206-
def test_concat_empty_df_object_dtype(self, dtype):
205+
def test_concat_empty_df_object_dtype(self):
207206
# GH 9149
208207
df_1 = DataFrame({"Row": [0, 1, 1], "EmptyCol": np.nan, "NumberCol": [1, 2, 3]})
209-
df_1["Row"] = df_1["Row"].astype(dtype)
210208
df_2 = DataFrame(columns=df_1.columns)
211209
result = pd.concat([df_1, df_2], axis=0)
212-
expected = df_1.copy()
210+
expected = df_1.astype(object)
213211
tm.assert_frame_equal(result, expected)
214212

215213
def test_concat_empty_dataframe_dtypes(self):

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy