Skip to content

Commit ecf0847

Browse files
committed
BUG: Bug in concatenation with duplicate columns across dtypes not merging with axis=0 (GH4771)
TST: Bug in iloc with a slice index failing (GH4771)
1 parent c1ab38e commit ecf0847

File tree

5 files changed

+59
-3
lines changed

5 files changed

+59
-3
lines changed

doc/source/release.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,8 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
331331
- Bug in multi-indexing with a partial string selection as one part of a MultIndex (:issue:`4758`)
332332
- Bug with reindexing on the index with a non-unique index will now raise ``ValueError`` (:issue:`4746`)
333333
- Bug in setting with ``loc/ix`` a single indexer with a multi-index axis and a numpy array, related to (:issue:`3777`)
334+
- Bug in concatenation with duplicate columns across dtypes not merging with axis=0 (:issue:`4771`)
335+
- Bug in ``iloc`` with a slice index failing (:issue:`4771`)
334336

335337
pandas 0.12
336338
===========

pandas/core/internals.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2174,7 +2174,7 @@ def get_slice(self, slobj, axis=0, raise_on_error=False):
21742174
placement=blk._ref_locs)
21752175
new_blocks = [newb]
21762176
else:
2177-
return self.reindex_items(new_items)
2177+
return self.reindex_items(new_items, indexer=np.arange(len(self.items))[slobj])
21782178
else:
21792179
new_blocks = self._slice_blocks(slobj, axis)
21802180

pandas/tests/test_indexing.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
MultiIndex, DatetimeIndex, Timestamp)
1717
from pandas.util.testing import (assert_almost_equal, assert_series_equal,
1818
assert_frame_equal, assert_panel_equal)
19-
from pandas import compat
19+
from pandas import compat, concat
2020

2121
import pandas.util.testing as tm
2222
import pandas.lib as lib
@@ -359,6 +359,29 @@ def test_iloc_getitem_slice(self):
359359
self.check_result('slice', 'iloc', slice(1,3), 'ix', { 0 : [2,4], 1: [3,6], 2: [4,8] }, typs = ['ints'])
360360
self.check_result('slice', 'iloc', slice(1,3), 'indexer', slice(1,3), typs = ['labels','mixed','ts','floats','empty'], fails = IndexError)
361361

362+
def test_iloc_getitem_slice_dups(self):
363+
364+
df1 = DataFrame(np.random.randn(10,4),columns=['A','A','B','B'])
365+
df2 = DataFrame(np.random.randint(0,10,size=20).reshape(10,2),columns=['A','C'])
366+
367+
# axis=1
368+
df = concat([df1,df2],axis=1)
369+
assert_frame_equal(df.iloc[:,:4],df1)
370+
assert_frame_equal(df.iloc[:,4:],df2)
371+
372+
df = concat([df2,df1],axis=1)
373+
assert_frame_equal(df.iloc[:,:2],df2)
374+
assert_frame_equal(df.iloc[:,2:],df1)
375+
376+
assert_frame_equal(df.iloc[:,0:3],concat([df2,df1.iloc[:,[0]]],axis=1))
377+
378+
# axis=0
379+
df = concat([df,df],axis=0)
380+
assert_frame_equal(df.iloc[0:10,:2],df2)
381+
assert_frame_equal(df.iloc[0:10,2:],df1)
382+
assert_frame_equal(df.iloc[10:,:2],df2)
383+
assert_frame_equal(df.iloc[10:,2:],df1)
384+
362385
def test_iloc_getitem_out_of_bounds(self):
363386

364387
# out-of-bounds slice

pandas/tools/merge.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -992,6 +992,7 @@ def _prepare_blocks(self):
992992
blockmaps = []
993993
for data in reindexed_data:
994994
data = data.consolidate()
995+
data._set_ref_locs()
995996
blockmaps.append(data.get_block_map(typ='dict'))
996997
return blockmaps, reindexed_data
997998

@@ -1063,7 +1064,10 @@ def _concat_blocks(self, blocks):
10631064
# or maybe would require performance test)
10641065
raise PandasError('dtypes are not consistent throughout '
10651066
'DataFrames')
1066-
return make_block(concat_values, blocks[0].items, self.new_axes[0])
1067+
return make_block(concat_values,
1068+
blocks[0].items,
1069+
self.new_axes[0],
1070+
placement=blocks[0]._ref_locs)
10671071
else:
10681072

10691073
offsets = np.r_[0, np.cumsum([len(x._data.axes[0]) for

pandas/tools/tests/test_merge.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1396,6 +1396,33 @@ def test_crossed_dtypes_weird_corner(self):
13961396
[df, df2], keys=['one', 'two'], names=['first', 'second'])
13971397
self.assertEqual(result.index.names, ('first', 'second'))
13981398

1399+
def test_dups_index(self):
1400+
# GH 4771
1401+
1402+
# single dtypes
1403+
df = DataFrame(np.random.randint(0,10,size=40).reshape(10,4),columns=['A','A','C','C'])
1404+
1405+
result = concat([df,df],axis=1)
1406+
assert_frame_equal(result.iloc[:,:4],df)
1407+
assert_frame_equal(result.iloc[:,4:],df)
1408+
1409+
result = concat([df,df],axis=0)
1410+
assert_frame_equal(result.iloc[:10],df)
1411+
assert_frame_equal(result.iloc[10:],df)
1412+
1413+
# multi dtypes
1414+
df = concat([DataFrame(np.random.randn(10,4),columns=['A','A','B','B']),
1415+
DataFrame(np.random.randint(0,10,size=20).reshape(10,2),columns=['A','C'])],
1416+
axis=1)
1417+
1418+
result = concat([df,df],axis=1)
1419+
assert_frame_equal(result.iloc[:,:6],df)
1420+
assert_frame_equal(result.iloc[:,6:],df)
1421+
1422+
result = concat([df,df],axis=0)
1423+
assert_frame_equal(result.iloc[:10],df)
1424+
assert_frame_equal(result.iloc[10:],df)
1425+
13991426
def test_handle_empty_objects(self):
14001427
df = DataFrame(np.random.randn(10, 4), columns=list('abcd'))
14011428

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy