Skip to content

Commit c4fa611

Browse files
[backport 2.3.x] Output formatting: preserve quoting for string categories (#61891) (#61966)
1 parent 2063943 commit c4fa611

File tree

5 files changed

+32
-52
lines changed

5 files changed

+32
-52
lines changed

pandas/core/arrays/categorical.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2215,8 +2215,16 @@ def _repr_categories(self) -> list[str]:
22152215
)
22162216
from pandas.io.formats import format as fmt
22172217

2218+
formatter = None
2219+
if self.categories.dtype == "str":
2220+
# the extension array formatter defaults to boxed=True in format_array
2221+
# override here to boxed=False to be consistent with QUOTE_NONNUMERIC
2222+
formatter = cast(ExtensionArray, self.categories._values)._formatter(
2223+
boxed=False
2224+
)
2225+
22182226
format_array = partial(
2219-
fmt.format_array, formatter=None, quoting=QUOTE_NONNUMERIC
2227+
fmt.format_array, formatter=formatter, quoting=QUOTE_NONNUMERIC
22202228
)
22212229
if len(self.categories) > max_categories:
22222230
num = max_categories // 2

pandas/tests/arrays/categorical/test_repr.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,11 @@
1919
class TestCategoricalReprWithFactor:
2020
def test_print(self, using_infer_string):
2121
factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True)
22-
if using_infer_string:
23-
expected = [
24-
"['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
25-
"Categories (3, str): [a < b < c]",
26-
]
27-
else:
28-
expected = [
29-
"['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
30-
"Categories (3, object): ['a' < 'b' < 'c']",
31-
]
22+
dtype = "str" if using_infer_string else "object"
23+
expected = [
24+
"['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
25+
f"Categories (3, {dtype}): ['a' < 'b' < 'c']",
26+
]
3227
expected = "\n".join(expected)
3328
actual = repr(factor)
3429
assert actual == expected

pandas/tests/indexes/categorical/test_category.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
from pandas._libs import index as libindex
75
from pandas._libs.arrays import NDArrayBacked
86

@@ -196,7 +194,6 @@ def test_unique(self, data, categories, expected_data, ordered):
196194
expected = CategoricalIndex(expected_data, dtype=dtype)
197195
tm.assert_index_equal(idx.unique(), expected)
198196

199-
@pytest.mark.xfail(using_string_dtype(), reason="repr doesn't roundtrip")
200197
def test_repr_roundtrip(self):
201198
ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
202199
str(ci)

pandas/tests/series/test_formats.py

Lines changed: 14 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -318,38 +318,27 @@ def test_categorical_repr(self, using_infer_string):
318318
assert exp == a.__str__()
319319

320320
a = Series(Categorical(["a", "b"] * 25))
321+
exp = (
322+
"0 a\n1 b\n"
323+
" ..\n"
324+
"48 a\n49 b\n"
325+
"Length: 50, dtype: category\nCategories (2, object): ['a', 'b']"
326+
)
321327
if using_infer_string:
322-
exp = (
323-
"0 a\n1 b\n"
324-
" ..\n"
325-
"48 a\n49 b\n"
326-
"Length: 50, dtype: category\nCategories (2, str): [a, b]"
327-
)
328-
else:
329-
exp = (
330-
"0 a\n1 b\n"
331-
" ..\n"
332-
"48 a\n49 b\n"
333-
"Length: 50, dtype: category\nCategories (2, object): ['a', 'b']"
334-
)
328+
exp = exp.replace("object", "str")
335329
with option_context("display.max_rows", 5):
336330
assert exp == repr(a)
337331

338332
levs = list("abcdefghijklmnopqrstuvwxyz")
339333
a = Series(Categorical(["a", "b"], categories=levs, ordered=True))
334+
exp = (
335+
"0 a\n1 b\n"
336+
"dtype: category\n"
337+
"Categories (26, object): ['a' < 'b' < 'c' < 'd' ... "
338+
"'w' < 'x' < 'y' < 'z']"
339+
)
340340
if using_infer_string:
341-
exp = (
342-
"0 a\n1 b\n"
343-
"dtype: category\n"
344-
"Categories (26, str): [a < b < c < d ... w < x < y < z]"
345-
)
346-
else:
347-
exp = (
348-
"0 a\n1 b\n"
349-
"dtype: category\n"
350-
"Categories (26, object): ['a' < 'b' < 'c' < 'd' ... "
351-
"'w' < 'x' < 'y' < 'z']"
352-
)
341+
exp = exp.replace("object", "str")
353342
assert exp == a.__str__()
354343

355344
def test_categorical_series_repr(self):

pandas/tests/util/test_assert_series_equal.py

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -215,24 +215,15 @@ def test_series_equal_numeric_values_mismatch(rtol):
215215

216216

217217
def test_series_equal_categorical_values_mismatch(rtol, using_infer_string):
218-
if using_infer_string:
219-
msg = """Series are different
220-
221-
Series values are different \\(66\\.66667 %\\)
222-
\\[index\\]: \\[0, 1, 2\\]
223-
\\[left\\]: \\['a', 'b', 'c'\\]
224-
Categories \\(3, str\\): \\[a, b, c\\]
225-
\\[right\\]: \\['a', 'c', 'b'\\]
226-
Categories \\(3, str\\): \\[a, b, c\\]"""
227-
else:
228-
msg = """Series are different
218+
dtype = "str" if using_infer_string else "object"
219+
msg = f"""Series are different
229220
230221
Series values are different \\(66\\.66667 %\\)
231222
\\[index\\]: \\[0, 1, 2\\]
232223
\\[left\\]: \\['a', 'b', 'c'\\]
233-
Categories \\(3, object\\): \\['a', 'b', 'c'\\]
224+
Categories \\(3, {dtype}\\): \\['a', 'b', 'c'\\]
234225
\\[right\\]: \\['a', 'c', 'b'\\]
235-
Categories \\(3, object\\): \\['a', 'b', 'c'\\]"""
226+
Categories \\(3, {dtype}\\): \\['a', 'b', 'c'\\]"""
236227

237228
s1 = Series(Categorical(["a", "b", "c"]))
238229
s2 = Series(Categorical(["a", "c", "b"]))

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy