[backport 2.3.x] Output formatting: preserve quoting for string categories (#61891) (#61966)

jorisvandenbossche · web-flow · commit c4fa611e713c · 2025-07-26T22:25:06.000+02:00
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -2215,8 +2215,16 @@ def _repr_categories(self) -> list[str]:
         )
         from pandas.io.formats import format as fmt
 
+        formatter = None
+        if self.categories.dtype == "str":
+            # the extension array formatter defaults to boxed=True in format_array
+            # override here to boxed=False to be consistent with QUOTE_NONNUMERIC
+            formatter = cast(ExtensionArray, self.categories._values)._formatter(
+                boxed=False
+            )
+
         format_array = partial(
-            fmt.format_array, formatter=None, quoting=QUOTE_NONNUMERIC
+            fmt.format_array, formatter=formatter, quoting=QUOTE_NONNUMERIC
         )
         if len(self.categories) > max_categories:
             num = max_categories // 2
diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py
@@ -19,16 +19,11 @@
 class TestCategoricalReprWithFactor:
     def test_print(self, using_infer_string):
         factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True)
-        if using_infer_string:
-            expected = [
-                "['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
-                "Categories (3, str): [a < b < c]",
-            ]
-        else:
-            expected = [
-                "['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
-                "Categories (3, object): ['a' < 'b' < 'c']",
-            ]
+        dtype = "str" if using_infer_string else "object"
+        expected = [
+            "['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
+            f"Categories (3, {dtype}): ['a' < 'b' < 'c']",
+        ]
         expected = "\n".join(expected)
         actual = repr(factor)
         assert actual == expected
diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas._libs import index as libindex
 from pandas._libs.arrays import NDArrayBacked
 
@@ -196,7 +194,6 @@ def test_unique(self, data, categories, expected_data, ordered):
         expected = CategoricalIndex(expected_data, dtype=dtype)
         tm.assert_index_equal(idx.unique(), expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="repr doesn't roundtrip")
     def test_repr_roundtrip(self):
         ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
         str(ci)
diff --git a/pandas/tests/series/test_formats.py b/pandas/tests/series/test_formats.py
@@ -318,38 +318,27 @@ def test_categorical_repr(self, using_infer_string):
         assert exp == a.__str__()
 
         a = Series(Categorical(["a", "b"] * 25))
+        exp = (
+            "0     a\n1     b\n"
+            "     ..\n"
+            "48    a\n49    b\n"
+            "Length: 50, dtype: category\nCategories (2, object): ['a', 'b']"
+        )
         if using_infer_string:
-            exp = (
-                "0     a\n1     b\n"
-                "     ..\n"
-                "48    a\n49    b\n"
-                "Length: 50, dtype: category\nCategories (2, str): [a, b]"
-            )
-        else:
-            exp = (
-                "0     a\n1     b\n"
-                "     ..\n"
-                "48    a\n49    b\n"
-                "Length: 50, dtype: category\nCategories (2, object): ['a', 'b']"
-            )
+            exp = exp.replace("object", "str")
         with option_context("display.max_rows", 5):
             assert exp == repr(a)
 
         levs = list("abcdefghijklmnopqrstuvwxyz")
         a = Series(Categorical(["a", "b"], categories=levs, ordered=True))
+        exp = (
+            "0    a\n1    b\n"
+            "dtype: category\n"
+            "Categories (26, object): ['a' < 'b' < 'c' < 'd' ... "
+            "'w' < 'x' < 'y' < 'z']"
+        )
         if using_infer_string:
-            exp = (
-                "0    a\n1    b\n"
-                "dtype: category\n"
-                "Categories (26, str): [a < b < c < d ... w < x < y < z]"
-            )
-        else:
-            exp = (
-                "0    a\n1    b\n"
-                "dtype: category\n"
-                "Categories (26, object): ['a' < 'b' < 'c' < 'd' ... "
-                "'w' < 'x' < 'y' < 'z']"
-            )
+            exp = exp.replace("object", "str")
         assert exp == a.__str__()
 
     def test_categorical_series_repr(self):
diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py
@@ -215,24 +215,15 @@ def test_series_equal_numeric_values_mismatch(rtol):
 
 
 def test_series_equal_categorical_values_mismatch(rtol, using_infer_string):
-    if using_infer_string:
-        msg = """Series are different
-
-Series values are different \\(66\\.66667 %\\)
-\\[index\\]: \\[0, 1, 2\\]
-\\[left\\]:  \\['a', 'b', 'c'\\]
-Categories \\(3, str\\): \\[a, b, c\\]
-\\[right\\]: \\['a', 'c', 'b'\\]
-Categories \\(3, str\\): \\[a, b, c\\]"""
-    else:
-        msg = """Series are different
+    dtype = "str" if using_infer_string else "object"
+    msg = f"""Series are different
 
 Series values are different \\(66\\.66667 %\\)
 \\[index\\]: \\[0, 1, 2\\]
 \\[left\\]:  \\['a', 'b', 'c'\\]
-Categories \\(3, object\\): \\['a', 'b', 'c'\\]
+Categories \\(3, {dtype}\\): \\['a', 'b', 'c'\\]
 \\[right\\]: \\['a', 'c', 'b'\\]
-Categories \\(3, object\\): \\['a', 'b', 'c'\\]"""
+Categories \\(3, {dtype}\\): \\['a', 'b', 'c'\\]"""
 
     s1 = Series(Categorical(["a", "b", "c"]))
     s2 = Series(Categorical(["a", "c", "b"]))