Skip to content

Commit c849d39

Browse files
Output formatting: preserve quoting for string categories (#61891)
1 parent e72c8a1 commit c849d39

File tree

6 files changed

+48
-144
lines changed

6 files changed

+48
-144
lines changed

pandas/core/arrays/categorical.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2233,8 +2233,16 @@ def _repr_categories(self) -> list[str]:
22332233
)
22342234
from pandas.io.formats import format as fmt
22352235

2236+
formatter = None
2237+
if self.categories.dtype == "str":
2238+
# the extension array formatter defaults to boxed=True in format_array
2239+
# override here to boxed=False to be consistent with QUOTE_NONNUMERIC
2240+
formatter = cast(ExtensionArray, self.categories._values)._formatter(
2241+
boxed=False
2242+
)
2243+
22362244
format_array = partial(
2237-
fmt.format_array, formatter=None, quoting=QUOTE_NONNUMERIC
2245+
fmt.format_array, formatter=formatter, quoting=QUOTE_NONNUMERIC
22382246
)
22392247
if len(self.categories) > max_categories:
22402248
num = max_categories // 2

pandas/tests/arrays/categorical/test_repr.py

Lines changed: 9 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,11 @@
1616
class TestCategoricalReprWithFactor:
1717
def test_print(self, using_infer_string):
1818
factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True)
19-
if using_infer_string:
20-
expected = [
21-
"['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
22-
"Categories (3, str): [a < b < c]",
23-
]
24-
else:
25-
expected = [
26-
"['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
27-
"Categories (3, object): ['a' < 'b' < 'c']",
28-
]
19+
dtype = "str" if using_infer_string else "object"
20+
expected = [
21+
"['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
22+
f"Categories (3, {dtype}): ['a' < 'b' < 'c']",
23+
]
2924
expected = "\n".join(expected)
3025
actual = repr(factor)
3126
assert actual == expected
@@ -82,10 +77,7 @@ def test_unicode_print(self, using_infer_string):
8277
Categories (3, object): ['aaaaa', 'bb', 'cccc']"""
8378

8479
if using_infer_string:
85-
expected = expected.replace(
86-
"(3, object): ['aaaaa', 'bb', 'cccc']",
87-
"(3, str): [aaaaa, bb, cccc]",
88-
)
80+
expected = expected.replace("object", "str")
8981

9082
assert repr(c) == expected
9183

@@ -96,10 +88,7 @@ def test_unicode_print(self, using_infer_string):
9688
Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']""" # noqa: E501
9789

9890
if using_infer_string:
99-
expected = expected.replace(
100-
"(3, object): ['ああああ', 'いいいいい', 'ううううううう']",
101-
"(3, str): [ああああ, いいいいい, ううううううう]",
102-
)
91+
expected = expected.replace("object", "str")
10392

10493
assert repr(c) == expected
10594

@@ -112,12 +101,9 @@ def test_unicode_print(self, using_infer_string):
112101
Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']""" # noqa: E501
113102

114103
if using_infer_string:
115-
expected = expected.replace(
116-
"(3, object): ['ああああ', 'いいいいい', 'ううううううう']",
117-
"(3, str): [ああああ, いいいいい, ううううううう]",
118-
)
104+
expected = expected.replace("object", "str")
119105

120-
assert repr(c) == expected
106+
assert repr(c) == expected
121107

122108
def test_categorical_repr(self):
123109
c = Categorical([1, 2, 3])

pandas/tests/indexes/categorical/test_category.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
from pandas._libs import index as libindex
75
from pandas._libs.arrays import NDArrayBacked
86

@@ -199,8 +197,6 @@ def test_unique(self, data, categories, expected_data, ordered):
199197
expected = CategoricalIndex(expected_data, dtype=dtype)
200198
tm.assert_index_equal(idx.unique(), expected)
201199

202-
# TODO(3.0): remove this test once using_string_dtype() is always True
203-
@pytest.mark.xfail(using_string_dtype(), reason="repr doesn't roundtrip")
204200
def test_repr_roundtrip(self):
205201
ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
206202
str(ci)

pandas/tests/indexes/categorical/test_formats.py

Lines changed: 12 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -8,125 +8,78 @@
88

99

1010
class TestCategoricalIndexReprStringCategories:
11-
def test_string_categorical_index_repr(self, using_infer_string):
11+
def test_string_categorical_index_repr(self):
1212
# short
1313
idx = CategoricalIndex(["a", "bb", "ccc"])
1414
expected = """CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa: E501
15-
if using_infer_string:
16-
expected = expected.replace(
17-
"categories=['a', 'bb', 'ccc']",
18-
"categories=[a, bb, ccc]",
19-
)
2015
assert repr(idx) == expected
2116

22-
def test_categorical_index_repr_multiline(self, using_infer_string):
17+
def test_categorical_index_repr_multiline(self):
2318
# multiple lines
2419
idx = CategoricalIndex(["a", "bb", "ccc"] * 10)
2520
expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
2621
'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb',
2722
'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
2823
categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa: E501
29-
if using_infer_string:
30-
expected = expected.replace(
31-
"categories=['a', 'bb', 'ccc']",
32-
"categories=[a, bb, ccc]",
33-
)
3424
assert repr(idx) == expected
3525

36-
def test_categorical_index_repr_truncated(self, using_infer_string):
26+
def test_categorical_index_repr_truncated(self):
3727
# truncated
3828
idx = CategoricalIndex(["a", "bb", "ccc"] * 100)
3929
expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
4030
...
4131
'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
4232
categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" # noqa: E501
43-
if using_infer_string:
44-
expected = expected.replace(
45-
"categories=['a', 'bb', 'ccc']",
46-
"categories=[a, bb, ccc]",
47-
)
4833
assert repr(idx) == expected
4934

50-
def test_categorical_index_repr_many_categories(self, using_infer_string):
35+
def test_categorical_index_repr_many_categories(self):
5136
# larger categories
5237
idx = CategoricalIndex(list("abcdefghijklmmo"))
5338
expected = """CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
5439
'm', 'm', 'o'],
5540
categories=['a', 'b', 'c', 'd', ..., 'k', 'l', 'm', 'o'], ordered=False, dtype='category')""" # noqa: E501
56-
if using_infer_string:
57-
expected = expected.replace(
58-
"categories=['a', 'b', 'c', 'd', ..., 'k', 'l', 'm', 'o']",
59-
"categories=[a, b, c, d, ..., k, l, m, o]",
60-
)
6141
assert repr(idx) == expected
6242

63-
def test_categorical_index_repr_unicode(self, using_infer_string):
43+
def test_categorical_index_repr_unicode(self):
6444
# short
6545
idx = CategoricalIndex(["あ", "いい", "ううう"])
6646
expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
67-
if using_infer_string:
68-
expected = expected.replace(
69-
"categories=['あ', 'いい', 'ううう']",
70-
"categories=[あ, いい, ううう]",
71-
)
7247
assert repr(idx) == expected
7348

74-
def test_categorical_index_repr_unicode_multiline(self, using_infer_string):
49+
def test_categorical_index_repr_unicode_multiline(self):
7550
# multiple lines
7651
idx = CategoricalIndex(["あ", "いい", "ううう"] * 10)
7752
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
7853
'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
7954
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
8055
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
81-
if using_infer_string:
82-
expected = expected.replace(
83-
"categories=['あ', 'いい', 'ううう']",
84-
"categories=[あ, いい, ううう]",
85-
)
8656
assert repr(idx) == expected
8757

88-
def test_categorical_index_repr_unicode_truncated(self, using_infer_string):
58+
def test_categorical_index_repr_unicode_truncated(self):
8959
# truncated
9060
idx = CategoricalIndex(["あ", "いい", "ううう"] * 100)
9161
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
9262
...
9363
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
9464
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa: E501
95-
if using_infer_string:
96-
expected = expected.replace(
97-
"categories=['あ', 'いい', 'ううう']",
98-
"categories=[あ, いい, ううう]",
99-
)
10065
assert repr(idx) == expected
10166

102-
def test_categorical_index_repr_unicode_many_categories(self, using_infer_string):
67+
def test_categorical_index_repr_unicode_many_categories(self):
10368
# larger categories
10469
idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
10570
expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し',
10671
'す', 'せ', 'そ'],
10772
categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ'], ordered=False, dtype='category')""" # noqa: E501
108-
if using_infer_string:
109-
expected = expected.replace(
110-
"categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ']",
111-
"categories=[あ, い, う, え, ..., し, す, せ, そ]",
112-
)
11373
assert repr(idx) == expected
11474

115-
def test_categorical_index_repr_east_asian_width(self, using_infer_string):
75+
def test_categorical_index_repr_east_asian_width(self):
11676
with cf.option_context("display.unicode.east_asian_width", True):
11777
# short
11878
idx = CategoricalIndex(["あ", "いい", "ううう"])
11979
expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
120-
if using_infer_string:
121-
expected = expected.replace(
122-
"categories=['あ', 'いい', 'ううう']",
123-
"categories=[あ, いい, ううう]",
124-
)
12580
assert repr(idx) == expected
12681

127-
def test_categorical_index_repr_east_asian_width_multiline(
128-
self, using_infer_string
129-
):
82+
def test_categorical_index_repr_east_asian_width_multiline(self):
13083
with cf.option_context("display.unicode.east_asian_width", True):
13184
# multiple lines
13285
idx = CategoricalIndex(["あ", "いい", "ううう"] * 10)
@@ -136,16 +89,9 @@ def test_categorical_index_repr_east_asian_width_multiline(
13689
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
13790
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
13891

139-
if using_infer_string:
140-
expected = expected.replace(
141-
"categories=['あ', 'いい', 'ううう']",
142-
"categories=[あ, いい, ううう]",
143-
)
14492
assert repr(idx) == expected
14593

146-
def test_categorical_index_repr_east_asian_width_truncated(
147-
self, using_infer_string
148-
):
94+
def test_categorical_index_repr_east_asian_width_truncated(self):
14995
with cf.option_context("display.unicode.east_asian_width", True):
15096
# truncated
15197
idx = CategoricalIndex(["あ", "いい", "ううう"] * 100)
@@ -156,25 +102,13 @@ def test_categorical_index_repr_east_asian_width_truncated(
156102
'あ', 'いい', 'ううう'],
157103
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa: E501
158104

159-
if using_infer_string:
160-
expected = expected.replace(
161-
"categories=['あ', 'いい', 'ううう']",
162-
"categories=[あ, いい, ううう]",
163-
)
164105
assert repr(idx) == expected
165106

166-
def test_categorical_index_repr_east_asian_width_many_categories(
167-
self, using_infer_string
168-
):
107+
def test_categorical_index_repr_east_asian_width_many_categories(self):
169108
with cf.option_context("display.unicode.east_asian_width", True):
170109
idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
171110
expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ',
172111
'さ', 'し', 'す', 'せ', 'そ'],
173112
categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ'], ordered=False, dtype='category')""" # noqa: E501
174113

175-
if using_infer_string:
176-
expected = expected.replace(
177-
"categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ']",
178-
"categories=[あ, い, う, え, ..., し, す, せ, そ]",
179-
)
180114
assert repr(idx) == expected

pandas/tests/series/test_formats.py

Lines changed: 14 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -309,38 +309,27 @@ def test_categorical_repr(self, using_infer_string):
309309
assert exp == a.__str__()
310310

311311
a = Series(Categorical(["a", "b"] * 25))
312+
exp = (
313+
"0 a\n1 b\n"
314+
" ..\n"
315+
"48 a\n49 b\n"
316+
"Length: 50, dtype: category\nCategories (2, object): ['a', 'b']"
317+
)
312318
if using_infer_string:
313-
exp = (
314-
"0 a\n1 b\n"
315-
" ..\n"
316-
"48 a\n49 b\n"
317-
"Length: 50, dtype: category\nCategories (2, str): [a, b]"
318-
)
319-
else:
320-
exp = (
321-
"0 a\n1 b\n"
322-
" ..\n"
323-
"48 a\n49 b\n"
324-
"Length: 50, dtype: category\nCategories (2, object): ['a', 'b']"
325-
)
319+
exp = exp.replace("object", "str")
326320
with option_context("display.max_rows", 5):
327321
assert exp == repr(a)
328322

329323
levs = list("abcdefghijklmnopqrstuvwxyz")
330324
a = Series(Categorical(["a", "b"], categories=levs, ordered=True))
325+
exp = (
326+
"0 a\n1 b\n"
327+
"dtype: category\n"
328+
"Categories (26, object): ['a' < 'b' < 'c' < 'd' ... "
329+
"'w' < 'x' < 'y' < 'z']"
330+
)
331331
if using_infer_string:
332-
exp = (
333-
"0 a\n1 b\n"
334-
"dtype: category\n"
335-
"Categories (26, str): [a < b < c < d ... w < x < y < z]"
336-
)
337-
else:
338-
exp = (
339-
"0 a\n1 b\n"
340-
"dtype: category\n"
341-
"Categories (26, object): ['a' < 'b' < 'c' < 'd' ... "
342-
"'w' < 'x' < 'y' < 'z']"
343-
)
332+
exp = exp.replace("object", "str")
344333
assert exp == a.__str__()
345334

346335
def test_categorical_series_repr(self):

pandas/tests/util/test_assert_series_equal.py

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -214,24 +214,15 @@ def test_series_equal_numeric_values_mismatch(rtol):
214214

215215

216216
def test_series_equal_categorical_values_mismatch(rtol, using_infer_string):
217-
if using_infer_string:
218-
msg = """Series are different
219-
220-
Series values are different \\(66\\.66667 %\\)
221-
\\[index\\]: \\[0, 1, 2\\]
222-
\\[left\\]: \\['a', 'b', 'c'\\]
223-
Categories \\(3, str\\): \\[a, b, c\\]
224-
\\[right\\]: \\['a', 'c', 'b'\\]
225-
Categories \\(3, str\\): \\[a, b, c\\]"""
226-
else:
227-
msg = """Series are different
217+
dtype = "str" if using_infer_string else "object"
218+
msg = f"""Series are different
228219
229220
Series values are different \\(66\\.66667 %\\)
230221
\\[index\\]: \\[0, 1, 2\\]
231222
\\[left\\]: \\['a', 'b', 'c'\\]
232-
Categories \\(3, object\\): \\['a', 'b', 'c'\\]
223+
Categories \\(3, {dtype}\\): \\['a', 'b', 'c'\\]
233224
\\[right\\]: \\['a', 'c', 'b'\\]
234-
Categories \\(3, object\\): \\['a', 'b', 'c'\\]"""
225+
Categories \\(3, {dtype}\\): \\['a', 'b', 'c'\\]"""
235226

236227
s1 = Series(Categorical(["a", "b", "c"]))
237228
s2 = Series(Categorical(["a", "c", "b"]))

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy