Skip to content

Commit 5f545f3

Browse files
committed
Update documentation for sort_values and natural sorting
1 parent e4a03b6 commit 5f545f3

File tree

2 files changed

+78
-37
lines changed

2 files changed

+78
-37
lines changed

pandas/core/frame.py

Lines changed: 40 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7173,35 +7173,54 @@ def sort_values(
71737173
`natural sorting <https://en.wikipedia.org/wiki/Natural_sort_order>`__.
71747174
This can be done using
71757175
``natsort`` `package <https://github.com/SethMMorton/natsort>`__,
7176-
which provides sorted indices according
7177-
to their natural order, as shown below:
7176+
which provides a function to generate a key
7177+
to sort data in their natural order:
71787178
71797179
>>> df = pd.DataFrame(
71807180
... {
7181-
... "time": ["0hr", "128hr", "72hr", "48hr", "96hr"],
7182-
... "value": [10, 20, 30, 40, 50],
7181+
... "hours": ["0hr", "128hr", "0hr", "64hr", "64hr", "128hr"],
7182+
... "mins": [
7183+
... "10mins",
7184+
... "40mins",
7185+
... "40mins",
7186+
... "40mins",
7187+
... "10mins",
7188+
... "10mins",
7189+
... ],
7190+
... "value": [10, 20, 30, 40, 50, 60],
71837191
... }
71847192
... )
71857193
>>> df
7186-
time value
7187-
0 0hr 10
7188-
1 128hr 20
7189-
2 72hr 30
7190-
3 48hr 40
7191-
4 96hr 50
7192-
>>> from natsort import index_natsorted
7193-
>>> index_natsorted(df["time"])
7194-
[0, 3, 2, 4, 1]
7194+
hours mins value
7195+
0 0hr 10mins 10
7196+
1 128hr 40mins 20
7197+
2 0hr 40mins 30
7198+
3 64hr 40mins 40
7199+
4 64hr 10mins 50
7200+
5 128hr 10mins 60
7201+
>>> from natsort import natsort_keygen
7202+
>>> natsort_keygen()(df["hours"])
7203+
(('', 0, 'hr'), ('', 128, 'hr'), ('', 72, 'hr'), ('', 48, 'hr'), ('', 96, 'hr'))
7204+
>>> natsort_keygen()(df["mins"])
7205+
(
7206+
('', 10, 'mins'),
7207+
('', 40, 'mins'),
7208+
('', 40, 'mins'),
7209+
('', 40, 'mins'),
7210+
('', 10, 'mins'),
7211+
('', 10, 'mins'),
7212+
)
71957213
>>> df.sort_values(
7196-
... by="time",
7197-
... key=lambda x: np.argsort(index_natsorted(x)),
7214+
... by=["hours", "mins"],
7215+
... key=natsort_keygen(),
71987216
... )
7199-
time value
7200-
0 0hr 10
7201-
3 48hr 40
7202-
2 72hr 30
7203-
4 96hr 50
7204-
1 128hr 20
7217+
hours mins value
7218+
0 0hr 10mins 10
7219+
2 0hr 40mins 30
7220+
4 64hr 10mins 50
7221+
3 64hr 40mins 40
7222+
5 128hr 10mins 60
7223+
1 128hr 40mins 20
72057224
"""
72067225
inplace = validate_bool_kwarg(inplace, "inplace")
72077226
axis = self._get_axis_number(axis)

pandas/core/generic.py

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5004,27 +5004,49 @@ def sort_values(
50045004
50055005
>>> df = pd.DataFrame(
50065006
... {
5007-
... "time": ["0hr", "128hr", "72hr", "48hr", "96hr"],
5008-
... "value": [10, 20, 30, 40, 50],
5007+
... "hours": ["0hr", "128hr", "0hr", "64hr", "64hr", "128hr"],
5008+
... "mins": [
5009+
... "10mins",
5010+
... "40mins",
5011+
... "40mins",
5012+
... "40mins",
5013+
... "10mins",
5014+
... "10mins",
5015+
... ],
5016+
... "value": [10, 20, 30, 40, 50, 60],
50095017
... }
50105018
... )
50115019
>>> df
5012-
time value
5013-
0 0hr 10
5014-
1 128hr 20
5015-
2 72hr 30
5016-
3 48hr 40
5017-
4 96hr 50
5018-
>>> from natsort import index_natsorted
5020+
hours mins value
5021+
0 0hr 10mins 10
5022+
1 128hr 40mins 20
5023+
2 0hr 40mins 30
5024+
3 64hr 40mins 40
5025+
4 64hr 10mins 50
5026+
5 128hr 10mins 60
5027+
>>> from natsort import natsort_keygen
5028+
>>> natsort_keygen()(df["hours"])
5029+
(('', 0, 'hr'), ('', 128, 'hr'), ('', 72, 'hr'), ('', 48, 'hr'), ('', 96, 'hr'))
5030+
>>> natsort_keygen()(df["mins"])
5031+
(
5032+
('', 10, 'mins'),
5033+
('', 40, 'mins'),
5034+
('', 40, 'mins'),
5035+
('', 40, 'mins'),
5036+
('', 10, 'mins'),
5037+
('', 10, 'mins'),
5038+
)
50195039
>>> df.sort_values(
5020-
... by="time", key=lambda x: np.argsort(index_natsorted(df["time"]))
5040+
... by=["hours", "mins"],
5041+
... key=natsort_keygen(),
50215042
... )
5022-
time value
5023-
0 0hr 10
5024-
3 48hr 40
5025-
2 72hr 30
5026-
4 96hr 50
5027-
1 128hr 20
5043+
hours mins value
5044+
0 0hr 10mins 10
5045+
2 0hr 40mins 30
5046+
4 64hr 10mins 50
5047+
3 64hr 40mins 40
5048+
5 128hr 10mins 60
5049+
1 128hr 40mins 20
50285050
"""
50295051
raise AbstractMethodError(self)
50305052

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy