python · lysnikolaou · Jul 1, 2025 · Jul 1, 2025 · Jul 1, 2025 · Jul 1, 2025
diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
@@ -307,6 +307,66 @@ These APIs can be used for fast direct character conversions:
    possible.  This function does not raise exceptions.
 
 
+.. c:function:: Py_ssize_t PyUCS4_ToLower(Py_UCS4 ch, Py_UCS4 *buffer, Py_ssize_t size)
+
+   Convert *ch* to lower case, store result in *buffer*, which should be
+   able to hold as many characters needed for *ch* to be lower cased, and
+   return the number of characters stored. If at some point a buffer overflow
+   is detected, an :exc:`ValueError` is raised and ``-1`` is returned.
+
+   In Unicode 16.0, any character can be lowercased into a buffer of *size* ``2``.
+   See also :c:macro:`PyUCS4_CASE_CONVERSION_BUFFER_SIZE`.
+
+   .. versionadded:: next
+
+
+.. c:function:: Py_ssize_t PyUCS4_ToUpper(Py_UCS4 ch, Py_UCS4 *buffer, Py_ssize_t size)
+
+   Convert *ch* to upper case, store result in *buffer*, which should be
+   able to hold as many characters needed for *ch* to be upper cased, and
+   return the number of characters stored. If at some point a buffer overflow
+   is detected, an :exc:`ValueError` is raised and ``-1`` is returned.
+
+   In Unicode 16.0, any character can be uppercased into a buffer of *size* ``3``.
+   See also :c:macro:`PyUCS4_CASE_CONVERSION_BUFFER_SIZE`.
+
+   .. versionadded:: next
+
+
+.. c:function:: Py_ssize_t PyUCS4_ToTitle(Py_UCS4 ch, Py_UCS4 *buffer, Py_ssize_t size)
+
+   Convert *ch* to title case, store result in *buffer*, which should be
+   able to hold as many characters needed for *ch* to be title cased, and
+   return the number of characters stored. If at some point a buffer overflow
+   is detected, an :exc:`ValueError` is raised and ``-1`` is returned.
+
+   In Unicode 16.0, any character can be titlecased into a buffer of *size* ``3``.
+   See also :c:macro:`PyUCS4_CASE_CONVERSION_BUFFER_SIZE`.
+
+   .. versionadded:: next
+
+
+.. c:function:: Py_ssize_t PyUCS4_ToFolded(Py_UCS4 ch, Py_UCS4 *buffer, Py_ssize_t size)
+
+   Foldcase *ch*, store result in *buffer*, which should be
+   able to hold as many characters needed for *ch* to be foldcased, and
+   return the number of characters stored. If at some point a buffer overflow
+   is detected, an :exc:`ValueError` is raised and ``-1`` is returned.
+
+   In Unicode 16.0, any character can be foldcased into a buffer of *size* ``3``.
+   See also :c:macro:`PyUCS4_CASE_CONVERSION_BUFFER_SIZE`.
+
+   .. versionadded:: next
+
+.. c:macro:: PyUCS4_CASE_CONVERSION_BUFFER_SIZE
+
+   The minimum buffer size needed for any call to :c:func:`PyUCS4_ToLower`,
+   :c:func:`PyUCS4_ToUpper`, :c:func:`PyUCS4_ToTitle`, or
+   :c:func:`PyUCS4_ToFolded`. That is, ``3`` for Unicode 16.0.
+
+.. versionadded:: next
+
+
 These APIs can be used to work with surrogates:
 
 .. c:function:: int Py_UNICODE_IS_SURROGATE(Py_UCS4 ch)

diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
@@ -733,6 +733,31 @@ PyAPI_FUNC(int) _PyUnicode_IsAlpha(
     Py_UCS4 ch       /* Unicode character */
     );
 
+PyAPI_FUNC(Py_ssize_t) PyUCS4_ToLower(
+    Py_UCS4 ch,     /* Unicode character */
+    Py_UCS4 *res,   /* Output buffer */
+    Py_ssize_t size        /* Buffer size */
+    );
+
+PyAPI_FUNC(Py_ssize_t) PyUCS4_ToUpper(
+    Py_UCS4 ch,     /* Unicode character */
+    Py_UCS4 *res,   /* Output buffer */
+    Py_ssize_t size        /* Buffer size */
+    );
+
+PyAPI_FUNC(Py_ssize_t) PyUCS4_ToTitle(
+    Py_UCS4 ch,     /* Unicode character */
+    Py_UCS4 *res,   /* Output buffer */
+    Py_ssize_t size        /* Buffer size */
+    );
+
+PyAPI_FUNC(Py_ssize_t) PyUCS4_ToFolded(
+    Py_UCS4 ch,     /* Unicode character */
+    Py_UCS4 *res,   /* Output buffer */
+    Py_ssize_t size        /* Buffer size */
+    );
+
+
 // Helper array used by Py_UNICODE_ISSPACE().
 PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
 
@@ -767,6 +792,8 @@ static inline int Py_UNICODE_ISSPACE(Py_UCS4 ch) {
 
 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
 
+#define PyUCS4_CASE_CONVERSION_BUFFER_SIZE 3
+
 static inline int Py_UNICODE_ISALNUM(Py_UCS4 ch) {
    return (Py_UNICODE_ISALPHA(ch)
            || Py_UNICODE_ISDECIMAL(ch)

diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h
@@ -15,10 +15,6 @@ extern "C" {
 
 extern int _PyUnicode_IsXidStart(Py_UCS4 ch);
 extern int _PyUnicode_IsXidContinue(Py_UCS4 ch);
-extern int _PyUnicode_ToLowerFull(Py_UCS4 ch, Py_UCS4 *res);
-extern int _PyUnicode_ToTitleFull(Py_UCS4 ch, Py_UCS4 *res);
-extern int _PyUnicode_ToUpperFull(Py_UCS4 ch, Py_UCS4 *res);
-extern int _PyUnicode_ToFoldedFull(Py_UCS4 ch, Py_UCS4 *res);
 extern int _PyUnicode_IsCaseIgnorable(Py_UCS4 ch);
 extern int _PyUnicode_IsCased(Py_UCS4 ch);
 

diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
@@ -1753,6 +1753,65 @@ def test_GET_CACHED_HASH(self):
         # impl detail: ASCII string hashes are equal to bytes ones
         self.assertEqual(unicode_GET_CACHED_HASH(obj), hash(content_bytes))
 
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_tolower(self):
+        import string
+        from _testcapi import unicode_tolower
+
+        for i, c in enumerate(string.ascii_uppercase):
+            with self.subTest(c):
+                self.assertEqual(unicode_tolower(c), string.ascii_lowercase[i])
+
+        # Test unicode character
+        self.assertEqual(unicode_tolower("Č"), "č")
+        self.assertEqual(unicode_tolower("Σ"), "σ")
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_toupper(self):
+        import string
+        from _testcapi import unicode_toupper, unicode_toupper_buffer_too_small
+
+        for i, c in enumerate(string.ascii_lowercase):
+            with self.subTest(c):
+                self.assertEqual(unicode_toupper(c), string.ascii_uppercase[i])
+
+        # Test unicode character
+        self.assertEqual(unicode_toupper("č"), "Č")
+        self.assertEqual(unicode_toupper("ß"), "SS")
+        self.assertEqual(unicode_toupper("ΐ"), "Ϊ́")
+
+        # Test unicode character with smaller buffer
+        with self.assertRaisesRegex(ValueError, "output buffer is too small"):
+            unicode_toupper_buffer_too_small("ß")
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_totitle(self):
+        from _testcapi import unicode_totitle
+
+        self.assertEqual(unicode_totitle("t"), "T")
+
+        # Test unicode character
+        self.assertEqual(unicode_totitle("ł"), "Ł")
+        self.assertEqual(unicode_totitle("ß"), "Ss")
+        self.assertEqual(unicode_totitle("ΐ"), "Ϊ́")
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_tofolded(self):
+        from _testcapi import unicode_tofolded
+
+        self.assertEqual(unicode_tofolded("T"), "t")
+
+        # Test unicode character
+        self.assertEqual(unicode_tofolded("Ł"), "ł")
+        self.assertEqual(unicode_tofolded("Σ"), "σ")
+
+        # Test case-ignorable character
+        self.assertEqual(unicode_tofolded("👍"), "👍")
+
 
 class PyUnicodeWriterTest(unittest.TestCase):
     def create_writer(self, size):

diff --git a/Misc/NEWS.d/next/C_API/2025-07-01-14-56-41.gh-issue-76535.9cwObj.rst b/Misc/NEWS.d/next/C_API/2025-07-01-14-56-41.gh-issue-76535.9cwObj.rst
@@ -0,0 +1 @@
+Make :c:func:`PyUCS4_ToLower`, :c:func:`PyUCS4_ToUpper`, :c:func:`PyUCS4_ToTitle` and :c:func:`PyUCS4_ToFolded` public.
diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c
@@ -220,6 +220,72 @@ unicode_copycharacters(PyObject *self, PyObject *args)
     return Py_BuildValue("(Nn)", to_copy, copied);
 }
 
+static PyObject *
+unicode_case_operation(PyObject *str, Py_ssize_t (*function)(Py_UCS4, Py_UCS4 *, Py_ssize_t),
+                       Py_UCS4 *buf, Py_ssize_t size)
+{
+    if (!PyUnicode_Check(str)) {
+        PyErr_Format(PyExc_TypeError, "expect str type, got %T", str);
+        return NULL;
+    }
+
+    if (PyUnicode_GET_LENGTH(str) != 1) {
+        PyErr_SetString(PyExc_ValueError, "expecting 1-character strings only");
+        return NULL;
+    }
+
+    Py_UCS4 c = PyUnicode_READ_CHAR(str, 0);
+
+    Py_ssize_t chars = function(c, buf, size);
+    if (chars < 0) {
+        return NULL;
+    }
+
+    return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buf, chars);
+}
+
+/* Test PyUCS4_ToLower() */
+static PyObject *
+unicode_tolower(PyObject *self, PyObject *arg)
+{
+    Py_UCS4 buf[PyUCS4_CASE_CONVERSION_BUFFER_SIZE];
+    return unicode_case_operation(arg, PyUCS4_ToLower, buf, PyUCS4_CASE_CONVERSION_BUFFER_SIZE);
+}
+
+
+/* Test PyUCS4_ToUpper() */
+static PyObject *
+unicode_toupper(PyObject *self, PyObject *arg)
+{
+    Py_UCS4 buf[PyUCS4_CASE_CONVERSION_BUFFER_SIZE];
+    return unicode_case_operation(arg, PyUCS4_ToUpper, buf, PyUCS4_CASE_CONVERSION_BUFFER_SIZE);
+}
+
+/* Test PyUCS4_ToUpper() with a small buffer */
+static PyObject *
+unicode_toupper_buffer_too_small(PyObject *self, PyObject *arg)
+{
+    Py_UCS4 buf;
+    return unicode_case_operation(arg, PyUCS4_ToUpper, &buf, 1);
+}
+
+/* Test PyUCS4_ToLower() */
+static PyObject *
+unicode_totitle(PyObject *self, PyObject *arg)
+{
+    Py_UCS4 buf[PyUCS4_CASE_CONVERSION_BUFFER_SIZE];
+    return unicode_case_operation(arg, PyUCS4_ToTitle, buf, PyUCS4_CASE_CONVERSION_BUFFER_SIZE);
+}
+
+/* Test PyUCS4_ToLower() */
+static PyObject *
+unicode_tofolded(PyObject *self, PyObject *arg)
+{
+    Py_UCS4 buf[PyUCS4_CASE_CONVERSION_BUFFER_SIZE];
+    return unicode_case_operation(arg, PyUCS4_ToFolded, buf, PyUCS4_CASE_CONVERSION_BUFFER_SIZE);
+}
+
+
 static PyObject*
 unicode_GET_CACHED_HASH(PyObject *self, PyObject *arg)
 {
@@ -577,6 +643,11 @@ static PyMethodDef TestMethods[] = {
     {"unicode_asutf8",           unicode_asutf8,                 METH_VARARGS},
     {"unicode_copycharacters",   unicode_copycharacters,         METH_VARARGS},
     {"unicode_GET_CACHED_HASH",  unicode_GET_CACHED_HASH,        METH_O},
+    {"unicode_tolower",          unicode_tolower,                METH_O},
+    {"unicode_toupper",          unicode_toupper,                METH_O},
+    {"unicode_toupper_buffer_too_small",    unicode_toupper_buffer_too_small,   METH_O},
+    {"unicode_totitle",          unicode_totitle,                METH_O},
+    {"unicode_tofolded",         unicode_tofolded,               METH_O},
     {NULL},
 };
 

diff --git a/Objects/unicodectype.c b/Objects/unicodectype.c
@@ -198,67 +198,103 @@ Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch)
     return ch + ctype->lower;
 }
 
-int _PyUnicode_ToLowerFull(Py_UCS4 ch, Py_UCS4 *res)
+Py_ssize_t PyUCS4_ToLower(Py_UCS4 ch, Py_UCS4 *res, Py_ssize_t size)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
     if (ctype->flags & EXTENDED_CASE_MASK) {
         int index = ctype->lower & 0xFFFF;
         int n = ctype->lower >> 24;
+        if (n > size) {
+            PyErr_SetString(PyExc_ValueError, "output buffer is too small");
+            return -1;
+        }
+
         int i;
         for (i = 0; i < n; i++)
             res[i] = _PyUnicode_ExtendedCase[index + i];
         return n;
     }
+
+    if (size < 1) {
+        PyErr_SetString(PyExc_ValueError, "output buffer is too small");
+        return -1;
+    }
     res[0] = ch + ctype->lower;
     return 1;
 }
 
-int _PyUnicode_ToTitleFull(Py_UCS4 ch, Py_UCS4 *res)
+Py_ssize_t PyUCS4_ToTitle(Py_UCS4 ch, Py_UCS4 *res, Py_ssize_t size)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
     if (ctype->flags & EXTENDED_CASE_MASK) {
         int index = ctype->title & 0xFFFF;
         int n = ctype->title >> 24;
+        if (n > size) {
+            PyErr_SetString(PyExc_ValueError, "output buffer is too small");
+            return -1;
+        }
+
         int i;
         for (i = 0; i < n; i++)
             res[i] = _PyUnicode_ExtendedCase[index + i];
         return n;
     }
+
+    if (size < 1) {
+        PyErr_SetString(PyExc_ValueError, "output buffer is too small");
+        return -1;
+    }
     res[0] = ch + ctype->title;
     return 1;
 }
 
-int _PyUnicode_ToUpperFull(Py_UCS4 ch, Py_UCS4 *res)
+Py_ssize_t PyUCS4_ToUpper(Py_UCS4 ch, Py_UCS4 *res, Py_ssize_t size)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
     if (ctype->flags & EXTENDED_CASE_MASK) {
         int index = ctype->upper & 0xFFFF;
         int n = ctype->upper >> 24;
+        if (n > size) {
+            PyErr_SetString(PyExc_ValueError, "output buffer is too small");
+            return -1;
+        }
+
         int i;
         for (i = 0; i < n; i++)
             res[i] = _PyUnicode_ExtendedCase[index + i];
         return n;
     }
+
+    if (size < 1) {
+        PyErr_SetString(PyExc_ValueError, "output buffer is too small");
+        return -1;
+    }
     res[0] = ch + ctype->upper;
     return 1;
 }
 
-int _PyUnicode_ToFoldedFull(Py_UCS4 ch, Py_UCS4 *res)
+Py_ssize_t PyUCS4_ToFolded(Py_UCS4 ch, Py_UCS4 *res, Py_ssize_t size)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
     if (ctype->flags & EXTENDED_CASE_MASK && (ctype->lower >> 20) & 7) {
         int index = (ctype->lower & 0xFFFF) + (ctype->lower >> 24);
         int n = (ctype->lower >> 20) & 7;
+        if (n > size) {
+            PyErr_SetString(PyExc_ValueError, "output buffer is too small");
+            return -1;
+        }
+
         int i;
         for (i = 0; i < n; i++)
             res[i] = _PyUnicode_ExtendedCase[index + i];
         return n;
     }
-    return _PyUnicode_ToLowerFull(ch, res);
+
+    return PyUCS4_ToLower(ch, res, size);
 }
 
 int _PyUnicode_IsCased(Py_UCS4 ch)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Make :c:func:`PyUCS4_ToLower`, :c:func:`PyUCS4_ToUpper`, :c:func:`PyUCS4_ToTitle` and :c:func:`PyUCS4_ToFolded` public.