Skip to content

Commit 22f04f7

Browse files
committed
Merge pull request pandas-dev#4766 from jreback/ix_assign
BUG: Bug in setting with loc/ix a single indexer on a multi-index axis and a listlike (related to GH3777)
2 parents 84ca068 + b057202 commit 22f04f7

File tree

4 files changed

+138
-25
lines changed

4 files changed

+138
-25
lines changed

doc/source/release.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,7 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
329329
- Bug with Series indexing not raising an error when the right-hand-side has an incorrect length (:issue:`2702`)
330330
- Bug in multi-indexing with a partial string selection as one part of a MultIndex (:issue:`4758`)
331331
- Bug with reindexing on the index with a non-unique index will now raise ``ValueError`` (:issue:`4746`)
332+
- Bug in setting with ``loc/ix`` a single indexer with a multi-index axis and a numpy array, related to (:issue:`3777`)
332333

333334
pandas 0.12
334335
===========

pandas/core/indexing.py

Lines changed: 80 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,10 @@ def _setitem_with_indexer(self, indexer, value):
163163
labels = _safe_append_to_index(index, key)
164164
self.obj._data = self.obj.reindex_axis(labels,i)._data
165165

166+
if isinstance(labels,MultiIndex):
167+
self.obj.sortlevel(inplace=True)
168+
labels = self.obj._get_axis(i)
169+
166170
nindexer.append(labels.get_loc(key))
167171

168172
else:
@@ -198,33 +202,77 @@ def _setitem_with_indexer(self, indexer, value):
198202
elif self.ndim >= 3:
199203
return self.obj.__setitem__(indexer,value)
200204

205+
# set
206+
info_axis = self.obj._info_axis_number
207+
item_labels = self.obj._get_axis(info_axis)
208+
209+
# if we have a complicated setup, take the split path
210+
if isinstance(indexer, tuple) and any([ isinstance(ax,MultiIndex) for ax in self.obj.axes ]):
211+
take_split_path = True
212+
201213
# align and set the values
202214
if take_split_path:
215+
203216
if not isinstance(indexer, tuple):
204217
indexer = self._tuplify(indexer)
205218

206219
if isinstance(value, ABCSeries):
207220
value = self._align_series(indexer, value)
208221

209-
info_axis = self.obj._info_axis_number
210222
info_idx = indexer[info_axis]
211-
212223
if com.is_integer(info_idx):
213224
info_idx = [info_idx]
225+
labels = item_labels[info_idx]
226+
227+
# if we have a partial multiindex, then need to adjust the plane indexer here
228+
if len(labels) == 1 and isinstance(self.obj[labels[0]].index,MultiIndex):
229+
index = self.obj[labels[0]].index
230+
idx = indexer[:info_axis][0]
231+
try:
232+
if idx in index:
233+
idx = index.get_loc(idx)
234+
except:
235+
pass
236+
plane_indexer = tuple([idx]) + indexer[info_axis + 1:]
237+
lplane_indexer = _length_of_indexer(plane_indexer[0],index)
214238

215-
plane_indexer = indexer[:info_axis] + indexer[info_axis + 1:]
216-
item_labels = self.obj._get_axis(info_axis)
239+
if is_list_like(value) and lplane_indexer != len(value):
240+
raise ValueError("cannot set using a multi-index selection indexer with a different length than the value")
241+
242+
# non-mi
243+
else:
244+
plane_indexer = indexer[:info_axis] + indexer[info_axis + 1:]
245+
if info_axis > 0:
246+
plane_axis = self.obj.axes[:info_axis][0]
247+
lplane_indexer = _length_of_indexer(plane_indexer[0],plane_axis)
248+
else:
249+
lplane_indexer = 0
217250

218251
def setter(item, v):
219252
s = self.obj[item]
220-
pi = plane_indexer[0] if len(plane_indexer) == 1 else plane_indexer
253+
pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer
221254

222255
# set the item, possibly having a dtype change
223256
s = s.copy()
224257
s._data = s._data.setitem(pi,v)
225258
self.obj[item] = s
226259

227-
labels = item_labels[info_idx]
260+
def can_do_equal_len():
261+
""" return True if we have an equal len settable """
262+
if not len(labels) == 1:
263+
return False
264+
265+
l = len(value)
266+
item = labels[0]
267+
index = self.obj[item].index
268+
269+
# equal len list/ndarray
270+
if len(index) == l:
271+
return True
272+
elif lplane_indexer == l:
273+
return True
274+
275+
return False
228276

229277
if _is_list_like(value):
230278

@@ -251,8 +299,7 @@ def setter(item, v):
251299
setter(item, value[:,i])
252300

253301
# we have an equal len list/ndarray
254-
elif len(labels) == 1 and (
255-
len(self.obj[labels[0]]) == len(value) or len(plane_indexer[0]) == len(value)):
302+
elif can_do_equal_len():
256303
setter(labels[0], value)
257304

258305
# per label values
@@ -1104,6 +1151,31 @@ def _convert_key(self, key):
11041151
# 32-bit floating point machine epsilon
11051152
_eps = np.finfo('f4').eps
11061153

1154+
def _length_of_indexer(indexer,target=None):
1155+
""" return the length of a single non-tuple indexer which could be a slice """
1156+
if target is not None and isinstance(indexer, slice):
1157+
l = len(target)
1158+
start = indexer.start
1159+
stop = indexer.stop
1160+
step = indexer.step
1161+
if start is None:
1162+
start = 0
1163+
elif start < 0:
1164+
start += l
1165+
if stop is None or stop > l:
1166+
stop = l
1167+
elif stop < 0:
1168+
stop += l
1169+
if step is None:
1170+
step = 1
1171+
elif step < 0:
1172+
step = abs(step)
1173+
return (stop-start) / step
1174+
elif isinstance(indexer, (ABCSeries, np.ndarray, list)):
1175+
return len(indexer)
1176+
elif not is_list_like(indexer):
1177+
return 1
1178+
raise AssertionError("cannot find the length of the indexer")
11071179

11081180
def _convert_to_index_sliceable(obj, key):
11091181
""" if we are index sliceable, then return my slicer, otherwise return None """

pandas/core/internals.py

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
is_list_like, _infer_dtype_from_scalar)
1313
from pandas.core.index import (Index, MultiIndex, _ensure_index,
1414
_handle_legacy_indexes)
15-
from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices
15+
from pandas.core.indexing import (_check_slice_bounds, _maybe_convert_indices,
16+
_length_of_indexer)
1617
import pandas.core.common as com
1718
from pandas.sparse.array import _maybe_to_sparse, SparseArray
1819
import pandas.lib as lib
@@ -563,22 +564,7 @@ def setitem(self, indexer, value):
563564
elif isinstance(indexer, slice):
564565

565566
if is_list_like(value) and l:
566-
start = indexer.start
567-
stop = indexer.stop
568-
step = indexer.step
569-
if start is None:
570-
start = 0
571-
elif start < 0:
572-
start += l
573-
if stop is None or stop > l:
574-
stop = len(values)
575-
elif stop < 0:
576-
stop += l
577-
if step is None:
578-
step = 1
579-
elif step < 0:
580-
step = abs(step)
581-
if (stop-start) / step != len(value):
567+
if len(value) != _length_of_indexer(indexer, values):
582568
raise ValueError("cannot set using a slice indexer with a different length than the value")
583569

584570
try:

pandas/tests/test_indexing.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -917,6 +917,60 @@ def f():
917917
#result = wp.loc[['Item1', 'Item2'], :, ['A', 'B']]
918918
#tm.assert_panel_equal(result,expected)
919919

920+
def test_multiindex_assignment(self):
921+
922+
# GH3777 part 2
923+
924+
# mixed dtype
925+
df = DataFrame(np.random.randint(5,10,size=9).reshape(3, 3),
926+
columns=list('abc'),
927+
index=[[4,4,8],[8,10,12]])
928+
df['d'] = np.nan
929+
arr = np.array([0.,1.])
930+
931+
df.ix[4,'d'] = arr
932+
assert_series_equal(df.ix[4,'d'],Series(arr,index=[8,10],name='d'))
933+
934+
# single dtype
935+
df = DataFrame(np.random.randint(5,10,size=9).reshape(3, 3),
936+
columns=list('abc'),
937+
index=[[4,4,8],[8,10,12]])
938+
939+
df.ix[4,'c'] = arr
940+
assert_series_equal(df.ix[4,'c'],Series(arr,index=[8,10],name='c',dtype='int64'))
941+
942+
# scalar ok
943+
df.ix[4,'c'] = 10
944+
assert_series_equal(df.ix[4,'c'],Series(10,index=[8,10],name='c',dtype='int64'))
945+
946+
# invalid assignments
947+
def f():
948+
df.ix[4,'c'] = [0,1,2,3]
949+
self.assertRaises(ValueError, f)
950+
951+
def f():
952+
df.ix[4,'c'] = [0]
953+
self.assertRaises(ValueError, f)
954+
955+
# groupby example
956+
NUM_ROWS = 100
957+
NUM_COLS = 10
958+
col_names = ['A'+num for num in map(str,np.arange(NUM_COLS).tolist())]
959+
index_cols = col_names[:5]
960+
df = DataFrame(np.random.randint(5, size=(NUM_ROWS,NUM_COLS)), dtype=np.int64, columns=col_names)
961+
df = df.set_index(index_cols).sort_index()
962+
grp = df.groupby(level=index_cols[:4])
963+
df['new_col'] = np.nan
964+
965+
f_index = np.arange(5)
966+
def f(name,df2):
967+
return Series(np.arange(df2.shape[0]),name=df2.index.values[0]).reindex(f_index)
968+
new_df = pd.concat([ f(name,df2) for name, df2 in grp ],axis=1).T
969+
970+
for name, df2 in grp:
971+
new_vals = np.arange(df2.shape[0])
972+
df.ix[name, 'new_col'] = new_vals
973+
920974
def test_multi_assign(self):
921975

922976
# GH 3626, an assignement of a sub-df to a df

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy