Skip to content

Commit b6a9309

Browse files
committed
BUG: Fix value_counts name handling
1 parent 3908ad5 commit b6a9309

File tree

5 files changed

+111
-84
lines changed

5 files changed

+111
-84
lines changed

doc/source/whatsnew/v0.17.0.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ Performance Improvements
7373

7474
Bug Fixes
7575
~~~~~~~~~
76+
7677
- Bug in ``DataFrame.apply`` when function returns categorical series. (:issue:`9573`)
7778

7879

@@ -100,3 +101,6 @@ Bug Fixes
100101

101102

102103
- Bug that caused segfault when resampling an empty Series (:issue:`10228`)
104+
- Bug in ``DatetimeIndex`` and ``PeriodIndex.value_counts`` resets name from its result, but retains in result's ``Index``. (:issue:`10150`)
105+
106+

pandas/core/algorithms.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
202202
from pandas.tools.tile import cut
203203
from pandas.tseries.period import PeriodIndex
204204

205+
name = getattr(values, 'name', None)
205206
values = Series(values).values
206207

207208
if bins is not None:
@@ -222,7 +223,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
222223
if com.is_datetime_or_timedelta_dtype(dtype) or is_period:
223224

224225
if is_period:
225-
values = PeriodIndex(values)
226+
values = PeriodIndex(values, name=name)
226227

227228
values = values.view(np.int64)
228229
keys, counts = htable.value_count_int64(values)
@@ -247,7 +248,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
247248
keys = np.insert(keys, 0, np.NaN)
248249
counts = np.insert(counts, 0, mask.sum())
249250

250-
result = Series(counts, index=com._values_from_object(keys))
251+
result = Series(counts, index=com._values_from_object(keys), name=name)
251252

252253
if bins is not None:
253254
# TODO: This next line should be more efficient

pandas/core/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -431,10 +431,10 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
431431

432432
if isinstance(self, PeriodIndex):
433433
# preserve freq
434-
result.index = self._simple_new(result.index.values, self.name,
434+
result.index = self._simple_new(result.index.values,
435435
freq=self.freq)
436436
elif isinstance(self, DatetimeIndex):
437-
result.index = self._simple_new(result.index.values, self.name,
437+
result.index = self._simple_new(result.index.values,
438438
tz=getattr(self, 'tz', None))
439439
return result
440440

pandas/tests/test_base.py

Lines changed: 72 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -181,23 +181,24 @@ def f():
181181

182182
class Ops(tm.TestCase):
183183
def setUp(self):
184-
self.bool_index = tm.makeBoolIndex(10)
185-
self.int_index = tm.makeIntIndex(10)
186-
self.float_index = tm.makeFloatIndex(10)
187-
self.dt_index = tm.makeDateIndex(10)
188-
self.dt_tz_index = tm.makeDateIndex(10).tz_localize(tz='US/Eastern')
189-
self.period_index = tm.makePeriodIndex(10)
190-
self.string_index = tm.makeStringIndex(10)
184+
self.bool_index = tm.makeBoolIndex(10, name='a')
185+
self.int_index = tm.makeIntIndex(10, name='a')
186+
self.float_index = tm.makeFloatIndex(10, name='a')
187+
self.dt_index = tm.makeDateIndex(10, name='a')
188+
self.dt_tz_index = tm.makeDateIndex(10, name='a').tz_localize(tz='US/Eastern')
189+
self.period_index = tm.makePeriodIndex(10, name='a')
190+
self.string_index = tm.makeStringIndex(10, name='a')
191+
self.unicode_index = tm.makeUnicodeIndex(10, name='a')
191192

192193
arr = np.random.randn(10)
193-
self.int_series = Series(arr, index=self.int_index)
194-
self.float_series = Series(arr, index=self.float_index)
195-
self.dt_series = Series(arr, index=self.dt_index)
194+
self.int_series = Series(arr, index=self.int_index, name='a')
195+
self.float_series = Series(arr, index=self.float_index, name='a')
196+
self.dt_series = Series(arr, index=self.dt_index, name='a')
196197
self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True)
197-
self.period_series = Series(arr, index=self.period_index)
198-
self.string_series = Series(arr, index=self.string_index)
198+
self.period_series = Series(arr, index=self.period_index, name='a')
199+
self.string_series = Series(arr, index=self.string_index, name='a')
199200

200-
types = ['bool','int','float','dt', 'dt_tz', 'period','string']
201+
types = ['bool','int','float','dt', 'dt_tz', 'period','string', 'unicode']
201202
fmts = [ "{0}_{1}".format(t,f) for t in types for f in ['index','series'] ]
202203
self.objs = [ getattr(self,f) for f in fmts if getattr(self,f,None) is not None ]
203204

@@ -213,9 +214,9 @@ def check_ops_properties(self, props, filter=None, ignore_failures=False):
213214

214215
try:
215216
if isinstance(o, Series):
216-
expected = Series(getattr(o.index,op),index=o.index)
217+
expected = Series(getattr(o.index,op), index=o.index, name='a')
217218
else:
218-
expected = getattr(o,op)
219+
expected = getattr(o, op)
219220
except (AttributeError):
220221
if ignore_failures:
221222
continue
@@ -361,21 +362,28 @@ def test_value_counts_unique_nunique(self):
361362
# create repeated values, 'n'th element is repeated by n+1 times
362363
if isinstance(o, PeriodIndex):
363364
# freq must be specified because repeat makes freq ambiguous
364-
expected_index = o[::-1]
365-
o = klass(np.repeat(values, range(1, len(o) + 1)), freq=o.freq)
365+
366+
# resets name from Index
367+
expected_index = pd.Index(o[::-1], name=None)
368+
369+
# attach name to klass
370+
o = klass(np.repeat(values, range(1, len(o) + 1)), freq=o.freq, name='a')
366371
# don't test boolean
367372
elif isinstance(o,Index) and o.is_boolean():
368373
continue
369374
elif isinstance(o, Index):
370-
expected_index = values[::-1]
371-
o = klass(np.repeat(values, range(1, len(o) + 1)))
375+
expected_index = pd.Index(values[::-1], name=None)
376+
o = klass(np.repeat(values, range(1, len(o) + 1)), name='a')
372377
else:
373-
expected_index = values[::-1]
378+
expected_index = pd.Index(values[::-1], name=None)
374379
idx = np.repeat(o.index.values, range(1, len(o) + 1))
375-
o = klass(np.repeat(values, range(1, len(o) + 1)), index=idx)
380+
o = klass(np.repeat(values, range(1, len(o) + 1)), index=idx, name='a')
376381

377-
expected_s = Series(range(10, 0, -1), index=expected_index, dtype='int64')
378-
tm.assert_series_equal(o.value_counts(), expected_s)
382+
expected_s = Series(range(10, 0, -1), index=expected_index, dtype='int64', name='a')
383+
result = o.value_counts()
384+
tm.assert_series_equal(result, expected_s)
385+
self.assertTrue(result.index.name is None)
386+
self.assertEqual(result.name, 'a')
379387

380388
result = o.unique()
381389
if isinstance(o, (DatetimeIndex, PeriodIndex)):
@@ -410,21 +418,34 @@ def test_value_counts_unique_nunique(self):
410418
# create repeated values, 'n'th element is repeated by n+1 times
411419
if isinstance(o, PeriodIndex):
412420
# freq must be specified because repeat makes freq ambiguous
413-
expected_index = o
414-
o = klass(np.repeat(values, range(1, len(o) + 1)), freq=o.freq)
421+
422+
# resets name from Index
423+
expected_index = pd.Index(o, name=None)
424+
# attach name to klass
425+
o = klass(np.repeat(values, range(1, len(o) + 1)), freq=o.freq, name='a')
415426
elif isinstance(o, Index):
416-
expected_index = values
417-
o = klass(np.repeat(values, range(1, len(o) + 1)))
427+
expected_index = pd.Index(values, name=None)
428+
o = klass(np.repeat(values, range(1, len(o) + 1)), name='a')
418429
else:
419-
expected_index = values
430+
expected_index = pd.Index(values, name=None)
420431
idx = np.repeat(o.index.values, range(1, len(o) + 1))
421-
o = klass(np.repeat(values, range(1, len(o) + 1)), index=idx)
422-
423-
expected_s_na = Series(list(range(10, 2, -1)) +[3], index=expected_index[9:0:-1], dtype='int64')
424-
expected_s = Series(list(range(10, 2, -1)), index=expected_index[9:1:-1], dtype='int64')
425-
426-
tm.assert_series_equal(o.value_counts(dropna=False), expected_s_na)
432+
o = klass(np.repeat(values, range(1, len(o) + 1)), index=idx, name='a')
433+
434+
expected_s_na = Series(list(range(10, 2, -1)) +[3],
435+
index=expected_index[9:0:-1],
436+
dtype='int64', name='a')
437+
expected_s = Series(list(range(10, 2, -1)),
438+
index=expected_index[9:1:-1],
439+
dtype='int64', name='a')
440+
441+
result_s_na = o.value_counts(dropna=False)
442+
tm.assert_series_equal(result_s_na, expected_s_na)
443+
self.assertTrue(result_s_na.index.name is None)
444+
self.assertEqual(result_s_na.name, 'a')
445+
result_s = o.value_counts()
427446
tm.assert_series_equal(o.value_counts(), expected_s)
447+
self.assertTrue(result_s.index.name is None)
448+
self.assertEqual(result_s.name, 'a')
428449

429450
# numpy_array_equal cannot compare arrays includes nan
430451
result = o.unique()
@@ -508,14 +529,15 @@ def test_value_counts_inferred(self):
508529
df = pd.read_fwf(f, widths=[6, 8, 3], names=["person_id", "dt", "food"],
509530
parse_dates=["dt"])
510531

511-
s = klass(df['dt'].copy())
532+
s = klass(df['dt'].copy(), name='dt')
512533

513-
idx = pd.to_datetime(['2010-01-01 00:00:00Z', '2008-09-09 00:00:00Z', '2009-01-01 00:00:00X'])
514-
expected_s = Series([3, 2, 1], index=idx)
534+
idx = pd.to_datetime(['2010-01-01 00:00:00Z', '2008-09-09 00:00:00Z',
535+
'2009-01-01 00:00:00X'])
536+
expected_s = Series([3, 2, 1], index=idx, name='dt')
515537
tm.assert_series_equal(s.value_counts(), expected_s)
516538

517-
expected = np.array(['2010-01-01 00:00:00Z', '2009-01-01 00:00:00Z', '2008-09-09 00:00:00Z'],
518-
dtype='datetime64[ns]')
539+
expected = np.array(['2010-01-01 00:00:00Z', '2009-01-01 00:00:00Z',
540+
'2008-09-09 00:00:00Z'], dtype='datetime64[ns]')
519541
if isinstance(s, DatetimeIndex):
520542
expected = DatetimeIndex(expected)
521543
self.assertTrue(s.unique().equals(expected))
@@ -526,7 +548,7 @@ def test_value_counts_inferred(self):
526548

527549
# with NaT
528550
s = df['dt'].copy()
529-
s = klass([v for v in s.values] + [pd.NaT])
551+
s = klass([v for v in s.values] + [pd.NaT], name='dt')
530552

531553
result = s.value_counts()
532554
self.assertEqual(result.index.dtype, 'datetime64[ns]')
@@ -547,10 +569,10 @@ def test_value_counts_inferred(self):
547569

548570
# timedelta64[ns]
549571
td = df.dt - df.dt + timedelta(1)
550-
td = klass(td)
572+
td = klass(td, name='dt')
551573

552574
result = td.value_counts()
553-
expected_s = Series([6], index=[Timedelta('1day')])
575+
expected_s = Series([6], index=[Timedelta('1day')], name='dt')
554576
tm.assert_series_equal(result, expected_s)
555577

556578
expected = TimedeltaIndex(['1 days'])
@@ -560,9 +582,8 @@ def test_value_counts_inferred(self):
560582
self.assert_numpy_array_equal(td.unique(), expected.values)
561583

562584
td2 = timedelta(1) + (df.dt - df.dt)
563-
td2 = klass(td2)
585+
td2 = klass(td2, name='dt')
564586
result2 = td2.value_counts()
565-
566587
tm.assert_series_equal(result2, expected_s)
567588

568589
def test_factorize(self):
@@ -629,7 +650,7 @@ def test_duplicated_drop_duplicates(self):
629650
# special case
630651
if original.is_boolean():
631652
result = original.drop_duplicates()
632-
expected = Index([False,True])
653+
expected = Index([False,True], name='a')
633654
tm.assert_index_equal(result, expected)
634655
continue
635656

@@ -668,25 +689,26 @@ def test_duplicated_drop_duplicates(self):
668689
idx.drop_duplicates(inplace=True)
669690

670691
else:
671-
expected = Series([False] * len(original), index=original.index)
692+
expected = Series([False] * len(original),
693+
index=original.index, name='a')
672694
tm.assert_series_equal(original.duplicated(), expected)
673695
result = original.drop_duplicates()
674696
tm.assert_series_equal(result, original)
675697
self.assertFalse(result is original)
676698

677699
idx = original.index[list(range(len(original))) + [5, 3]]
678700
values = original.values[list(range(len(original))) + [5, 3]]
679-
s = Series(values, index=idx)
701+
s = Series(values, index=idx, name='a')
680702

681-
expected = Series([False] * len(original) + [True, True], index=idx)
703+
expected = Series([False] * len(original) + [True, True],
704+
index=idx, name='a')
682705
tm.assert_series_equal(s.duplicated(), expected)
683706
tm.assert_series_equal(s.drop_duplicates(), original)
684707

685708
last_base = [False] * len(idx)
686709
last_base[3] = True
687710
last_base[5] = True
688-
expected = Series(last_base, index=idx)
689-
expected
711+
expected = Series(last_base, index=idx, name='a')
690712
tm.assert_series_equal(s.duplicated(take_last=True), expected)
691713
tm.assert_series_equal(s.drop_duplicates(take_last=True),
692714
s[~np.array(last_base)])

pandas/util/testing.py

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -817,43 +817,43 @@ def getArangeMat():
817817

818818

819819
# make index
820-
def makeStringIndex(k=10):
821-
return Index(rands_array(nchars=10, size=k))
820+
def makeStringIndex(k=10, name=None):
821+
return Index(rands_array(nchars=10, size=k), name=name)
822822

823823

824-
def makeUnicodeIndex(k=10):
824+
def makeUnicodeIndex(k=10, name=None):
825825
return Index(randu_array(nchars=10, size=k))
826826

827-
def makeCategoricalIndex(k=10, n=3):
827+
def makeCategoricalIndex(k=10, n=3, name=None):
828828
""" make a length k index or n categories """
829829
x = rands_array(nchars=4, size=n)
830-
return CategoricalIndex(np.random.choice(x,k))
830+
return CategoricalIndex(np.random.choice(x,k), name=name)
831831

832-
def makeBoolIndex(k=10):
832+
def makeBoolIndex(k=10, name=None):
833833
if k == 1:
834-
return Index([True])
834+
return Index([True], name=name)
835835
elif k == 2:
836-
return Index([False,True])
837-
return Index([False,True] + [False]*(k-2))
836+
return Index([False,True], name=name)
837+
return Index([False,True] + [False]*(k-2), name=name)
838838

839-
def makeIntIndex(k=10):
840-
return Index(lrange(k))
839+
def makeIntIndex(k=10, name=None):
840+
return Index(lrange(k), name=name)
841841

842-
def makeFloatIndex(k=10):
842+
def makeFloatIndex(k=10, name=None):
843843
values = sorted(np.random.random_sample(k)) - np.random.random_sample(1)
844-
return Index(values * (10 ** np.random.randint(0, 9)))
844+
return Index(values * (10 ** np.random.randint(0, 9)), name=name)
845845

846-
def makeDateIndex(k=10, freq='B'):
846+
def makeDateIndex(k=10, freq='B', name=None):
847847
dt = datetime(2000, 1, 1)
848-
dr = bdate_range(dt, periods=k, freq=freq)
849-
return DatetimeIndex(dr)
848+
dr = bdate_range(dt, periods=k, freq=freq, name=name)
849+
return DatetimeIndex(dr, name=name)
850850

851-
def makeTimedeltaIndex(k=10, freq='D'):
852-
return TimedeltaIndex(start='1 day',periods=k,freq=freq)
851+
def makeTimedeltaIndex(k=10, freq='D', name=None):
852+
return TimedeltaIndex(start='1 day', periods=k, freq=freq, name=name)
853853

854-
def makePeriodIndex(k=10):
854+
def makePeriodIndex(k=10, name=None):
855855
dt = datetime(2000, 1, 1)
856-
dr = PeriodIndex(start=dt, periods=k, freq='B')
856+
dr = PeriodIndex(start=dt, periods=k, freq='B', name=name)
857857
return dr
858858

859859
def all_index_generator(k=10):
@@ -885,38 +885,38 @@ def all_timeseries_index_generator(k=10):
885885

886886

887887
# make series
888-
def makeFloatSeries():
888+
def makeFloatSeries(name=None):
889889
index = makeStringIndex(N)
890-
return Series(randn(N), index=index)
890+
return Series(randn(N), index=index, name=name)
891891

892892

893-
def makeStringSeries():
893+
def makeStringSeries(name=None):
894894
index = makeStringIndex(N)
895-
return Series(randn(N), index=index)
895+
return Series(randn(N), index=index, name=name)
896896

897897

898-
def makeObjectSeries():
898+
def makeObjectSeries(name=None):
899899
dateIndex = makeDateIndex(N)
900900
dateIndex = Index(dateIndex, dtype=object)
901901
index = makeStringIndex(N)
902-
return Series(dateIndex, index=index)
902+
return Series(dateIndex, index=index, name=name)
903903

904904

905905
def getSeriesData():
906906
index = makeStringIndex(N)
907907
return dict((c, Series(randn(N), index=index)) for c in getCols(K))
908908

909909

910-
def makeTimeSeries(nper=None, freq='B'):
910+
def makeTimeSeries(nper=None, freq='B', name=None):
911911
if nper is None:
912912
nper = N
913-
return Series(randn(nper), index=makeDateIndex(nper, freq=freq))
913+
return Series(randn(nper), index=makeDateIndex(nper, freq=freq), name=name)
914914

915915

916-
def makePeriodSeries(nper=None):
916+
def makePeriodSeries(nper=None, name=None):
917917
if nper is None:
918918
nper = N
919-
return Series(randn(nper), index=makePeriodIndex(nper))
919+
return Series(randn(nper), index=makePeriodIndex(nper), name=name)
920920

921921

922922
def getTimeSeriesData(nper=None, freq='B'):

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy