From 81621298add2ac6ff0439b641887614664bb740c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 25 Sep 2024 11:41:23 +0200 Subject: [PATCH 1/5] gh-124502: Add PyUnicode_Equal() function --- Doc/c-api/unicode.rst | 18 +++++++++++++ Doc/data/stable_abi.dat | 1 + Doc/whatsnew/3.14.rst | 3 +++ Include/unicodeobject.h | 4 +++ Lib/test/test_capi/test_unicode.py | 27 +++++++++++++++++++ Lib/test/test_stable_abi_ctypes.py | 1 + ...-09-25-11-44-02.gh-issue-124502.qWuDjT.rst | 2 ++ Misc/stable_abi.toml | 2 ++ Modules/_testlimitedcapi/unicode.c | 18 +++++++++++++ Objects/unicodeobject.c | 17 ++++++++++++ PC/python3dll.c | 1 + 11 files changed, 94 insertions(+) create mode 100644 Misc/NEWS.d/next/C_API/2024-09-25-11-44-02.gh-issue-124502.qWuDjT.rst diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 958fafd47ac81b..3cb9984199bbe5 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -1423,6 +1423,24 @@ They all return ``NULL`` or ``-1`` if an exception occurs. This function returns ``-1`` upon failure, so one should call :c:func:`PyErr_Occurred` to check for errors. + See also :c:func:`PyUnicode_Equal`. + + +.. c:function:: int PyUnicode_Equal(PyObject *a, PyObject *b) + + Test if two strings are equal: + + * Return ``1`` if *a* is equal to *b*. + * Return ``0`` if *a* is not equal to *b*. + * Set a :exc:`TypeError` exception and return ``-1`` if *a* or *b* is not a + Python :class:`str` object. + + The function always succeed if *a* and *b* are Python :class:`str` objects. + + See also :c:func:`PyUnicode_Compare`. + + .. versionadded:: 3.14 + .. c:function:: int PyUnicode_EqualToUTF8AndSize(PyObject *unicode, const char *string, Py_ssize_t size) diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat index 19dc71a345b474..9314facd2ad873 100644 --- a/Doc/data/stable_abi.dat +++ b/Doc/data/stable_abi.dat @@ -783,6 +783,7 @@ func,PyUnicode_DecodeUnicodeEscape,3.2,, func,PyUnicode_EncodeCodePage,3.7,on Windows, func,PyUnicode_EncodeFSDefault,3.2,, func,PyUnicode_EncodeLocale,3.7,, +func,PyUnicode_Equal,3.14,, func,PyUnicode_EqualToUTF8,3.13,, func,PyUnicode_EqualToUTF8AndSize,3.13,, func,PyUnicode_FSConverter,3.2,, diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 5acb9bfe18b2d0..76ad76cdbc75af 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -646,6 +646,9 @@ New Features `__ mentioned in :pep:`630` (:gh:`124153`). +* Add :c:func:`PyUnicode_Equal` function to test if two strings are equal. + (Contributed by Victor Stinner in :gh:`124502`.) + Porting to Python 3.14 ---------------------- diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index dee00715b3c51d..2ce3a008b7129e 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -966,6 +966,10 @@ PyAPI_FUNC(int) PyUnicode_EqualToUTF8(PyObject *, const char *); PyAPI_FUNC(int) PyUnicode_EqualToUTF8AndSize(PyObject *, const char *, Py_ssize_t); #endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030e0000 +PyAPI_FUNC(int) PyUnicode_Equal(PyObject *str1, PyObject *str2); +#endif + /* Rich compare two strings and return one of the following: - NULL in case an exception was raised diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index e6f85427214958..6a55c3dc5347dc 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -1903,6 +1903,33 @@ def test_recover_error(self): self.assertEqual(writer.finish(), 'Hello World.') + def test_unicode_equal(self): + unicode_equal = _testlimitedcapi.unicode_equal + + def copy(text): + return text.encode().decode() + + self.assertTrue(unicode_equal("", "")) + self.assertTrue(unicode_equal("abc", "abc")) + self.assertTrue(unicode_equal("abc", copy("abc"))) + self.assertTrue(unicode_equal("\u20ac", copy("\u20ac"))) + self.assertTrue(unicode_equal("\U0010ffff", copy("\U0010ffff"))) + + self.assertFalse(unicode_equal("abc", "abcd")) + self.assertFalse(unicode_equal("\u20ac", "\u20ad")) + self.assertFalse(unicode_equal("\U0010ffff", "\U0010fffe")) + + # invalid type + for invalid_type in (b'bytes', 123, ("tuple",)): + with self.subTest(invalid_type=invalid_type): + with self.assertRaises(TypeError): + unicode_equal("abc", invalid_type) + with self.assertRaises(TypeError): + unicode_equal(invalid_type, "abc") + + # CRASHES unicode_equal("abc", NULL) + # CRASHES unicode_equal(NULL, "abc") + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py index d16ad7ef5d4328..b14d500a9c6e97 100644 --- a/Lib/test/test_stable_abi_ctypes.py +++ b/Lib/test/test_stable_abi_ctypes.py @@ -805,6 +805,7 @@ def test_windows_feature_macros(self): "PyUnicode_DecodeUnicodeEscape", "PyUnicode_EncodeFSDefault", "PyUnicode_EncodeLocale", + "PyUnicode_Equal", "PyUnicode_EqualToUTF8", "PyUnicode_EqualToUTF8AndSize", "PyUnicode_FSConverter", diff --git a/Misc/NEWS.d/next/C_API/2024-09-25-11-44-02.gh-issue-124502.qWuDjT.rst b/Misc/NEWS.d/next/C_API/2024-09-25-11-44-02.gh-issue-124502.qWuDjT.rst new file mode 100644 index 00000000000000..b763a5e1aaba3f --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-09-25-11-44-02.gh-issue-124502.qWuDjT.rst @@ -0,0 +1,2 @@ +Add :c:func:`PyUnicode_Equal` function to test if two strings are equal. +Patch by Victor Stinner. diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index fe0a5e44f8fb15..62978261745d79 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -2536,3 +2536,5 @@ added = '3.14' [const.Py_TP_USE_SPEC] added = '3.14' +[function.PyUnicode_Equal] + added = '3.14' diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c index 2b70d09108a333..94266da6234a34 100644 --- a/Modules/_testlimitedcapi/unicode.c +++ b/Modules/_testlimitedcapi/unicode.c @@ -1837,6 +1837,23 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored)) #undef CHECK_FORMAT_0 } + +/* Test PyUnicode_Equal() */ +static PyObject * +unicode_equal(PyObject *module, PyObject *args) +{ + PyObject *str1, *str2; + if (!PyArg_ParseTuple(args, "OO", &str1, &str2)) { + return NULL; + } + + NULLABLE(str1); + NULLABLE(str2); + RETURN_INT(PyUnicode_Equal(str1, str2)); +} + + + static PyMethodDef TestMethods[] = { {"codec_incrementalencoder", codec_incrementalencoder, METH_VARARGS}, {"codec_incrementaldecoder", codec_incrementaldecoder, METH_VARARGS}, @@ -1924,6 +1941,7 @@ static PyMethodDef TestMethods[] = { {"unicode_format", unicode_format, METH_VARARGS}, {"unicode_contains", unicode_contains, METH_VARARGS}, {"unicode_isidentifier", unicode_isidentifier, METH_O}, + {"unicode_equal", unicode_equal, METH_VARARGS}, {NULL}, }; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2494c989544ca0..9d82872306ce1b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11009,6 +11009,23 @@ _PyUnicode_Equal(PyObject *str1, PyObject *str2) } +int +PyUnicode_Equal(PyObject *str1, PyObject *str2) +{ + if (!PyUnicode_Check(str1)) { + PyErr_Format(PyExc_TypeError, + "first argument must be str, not %T", str1); + return -1; + } + if (!PyUnicode_Check(str2)) { + PyErr_Format(PyExc_TypeError, + "second argument must be str, not %T", str2); + return -1; + } + return _PyUnicode_Equal(str1, str2); +} + + int PyUnicode_Compare(PyObject *left, PyObject *right) { diff --git a/PC/python3dll.c b/PC/python3dll.c index 6b8208ab90bd95..9296474617e115 100755 --- a/PC/python3dll.c +++ b/PC/python3dll.c @@ -717,6 +717,7 @@ EXPORT_FUNC(PyUnicode_DecodeUTF8Stateful) EXPORT_FUNC(PyUnicode_EncodeCodePage) EXPORT_FUNC(PyUnicode_EncodeFSDefault) EXPORT_FUNC(PyUnicode_EncodeLocale) +EXPORT_FUNC(PyUnicode_Equal) EXPORT_FUNC(PyUnicode_EqualToUTF8) EXPORT_FUNC(PyUnicode_EqualToUTF8AndSize) EXPORT_FUNC(PyUnicode_Find) From 4a2ae3d7b9de626d34e1af85c1a858003b2466aa Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 25 Sep 2024 13:22:37 +0200 Subject: [PATCH 2/5] Fix for regular build (not Free Threading) --- Modules/_testlimitedcapi/unicode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c index 94266da6234a34..c7a23d5d1cbd71 100644 --- a/Modules/_testlimitedcapi/unicode.c +++ b/Modules/_testlimitedcapi/unicode.c @@ -1,7 +1,7 @@ #include "pyconfig.h" // Py_GIL_DISABLED #ifndef Py_GIL_DISABLED - // Need limited C API 3.13 to test PyUnicode_EqualToUTF8() -# define Py_LIMITED_API 0x030d0000 + // Need limited C API 3.14 to test PyUnicode_Equal() +# define Py_LIMITED_API 0x030e0000 #endif #include "parts.h" From 89531920e11c07ba291521266bfe066a6d655d94 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 25 Sep 2024 15:34:02 +0200 Subject: [PATCH 3/5] Address reviews --- Doc/c-api/unicode.rst | 12 ++++++++---- Doc/whatsnew/3.14.rst | 3 ++- Lib/test/test_capi/test_unicode.py | 6 ++++++ ...24-09-25-11-44-02.gh-issue-124502.qWuDjT.rst | 4 ++-- Objects/unicodeobject.c | 17 +++++++---------- 5 files changed, 25 insertions(+), 17 deletions(-) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 3cb9984199bbe5..3d627180f47432 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -1423,7 +1423,9 @@ They all return ``NULL`` or ``-1`` if an exception occurs. This function returns ``-1`` upon failure, so one should call :c:func:`PyErr_Occurred` to check for errors. - See also :c:func:`PyUnicode_Equal`. + .. seealso:: + + The :c:func:`PyUnicode_Equal` function. .. c:function:: int PyUnicode_Equal(PyObject *a, PyObject *b) @@ -1433,11 +1435,13 @@ They all return ``NULL`` or ``-1`` if an exception occurs. * Return ``1`` if *a* is equal to *b*. * Return ``0`` if *a* is not equal to *b*. * Set a :exc:`TypeError` exception and return ``-1`` if *a* or *b* is not a - Python :class:`str` object. + :class:`str` object. - The function always succeed if *a* and *b* are Python :class:`str` objects. + The function always succeeds if *a* and *b* are :class:`str` objects. + + .. seealso:: - See also :c:func:`PyUnicode_Compare`. + The :c:func:`PyUnicode_Compare` function. .. versionadded:: 3.14 diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 76ad76cdbc75af..b019d7ea255d30 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -646,7 +646,8 @@ New Features `__ mentioned in :pep:`630` (:gh:`124153`). -* Add :c:func:`PyUnicode_Equal` function to test if two strings are equal. +* Add :c:func:`PyUnicode_Equal` function to the limited C API: + test if two strings are equal. (Contributed by Victor Stinner in :gh:`124502`.) diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index 6a55c3dc5347dc..65d8242ad3fc60 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -1919,6 +1919,12 @@ def copy(text): self.assertFalse(unicode_equal("\u20ac", "\u20ad")) self.assertFalse(unicode_equal("\U0010ffff", "\U0010fffe")) + # str subclass + self.assertTrue(unicode_equal("abc", Str("abc"))) + self.assertTrue(unicode_equal(Str("abc"), "abc")) + self.assertFalse(unicode_equal("abc", Str("abcd"))) + self.assertFalse(unicode_equal(Str("abc"), "abcd")) + # invalid type for invalid_type in (b'bytes', 123, ("tuple",)): with self.subTest(invalid_type=invalid_type): diff --git a/Misc/NEWS.d/next/C_API/2024-09-25-11-44-02.gh-issue-124502.qWuDjT.rst b/Misc/NEWS.d/next/C_API/2024-09-25-11-44-02.gh-issue-124502.qWuDjT.rst index b763a5e1aaba3f..f515619328b359 100644 --- a/Misc/NEWS.d/next/C_API/2024-09-25-11-44-02.gh-issue-124502.qWuDjT.rst +++ b/Misc/NEWS.d/next/C_API/2024-09-25-11-44-02.gh-issue-124502.qWuDjT.rst @@ -1,2 +1,2 @@ -Add :c:func:`PyUnicode_Equal` function to test if two strings are equal. -Patch by Victor Stinner. +Add :c:func:`PyUnicode_Equal` function to the limited C API: test if two +strings are equal. Patch by Victor Stinner. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 9d82872306ce1b..9a1c0ddcf7825f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11012,17 +11012,14 @@ _PyUnicode_Equal(PyObject *str1, PyObject *str2) int PyUnicode_Equal(PyObject *str1, PyObject *str2) { - if (!PyUnicode_Check(str1)) { - PyErr_Format(PyExc_TypeError, - "first argument must be str, not %T", str1); - return -1; - } - if (!PyUnicode_Check(str2)) { - PyErr_Format(PyExc_TypeError, - "second argument must be str, not %T", str2); - return -1; + if (PyUnicode_Check(str1) && PyUnicode_Check(str2)) { + return _PyUnicode_Equal(str1, str2); } - return _PyUnicode_Equal(str1, str2); + + PyErr_Format(PyExc_TypeError, + "Can't compare %T and %T", + str1, str2); + return -1; } From ac057c65832df5fa9c12d4fd375e8e98ffa13e83 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 25 Sep 2024 16:28:24 +0200 Subject: [PATCH 4/5] Change error message --- Objects/unicodeobject.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 9a1c0ddcf7825f..dc3d5ba630af18 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11012,14 +11012,18 @@ _PyUnicode_Equal(PyObject *str1, PyObject *str2) int PyUnicode_Equal(PyObject *str1, PyObject *str2) { - if (PyUnicode_Check(str1) && PyUnicode_Check(str2)) { - return _PyUnicode_Equal(str1, str2); + if (!PyUnicode_Check(str1)) { + PyErr_Format(PyExc_TypeError, + "first argument must be str, not %T", str1); + return -1; + } + if (!PyUnicode_Check(str2)) { + PyErr_Format(PyExc_TypeError, + "second argument must be str, not %T", str2); + return -1; } - PyErr_Format(PyExc_TypeError, - "Can't compare %T and %T", - str1, str2); - return -1; + return _PyUnicode_Equal(str1, str2); } From b5b30bf08247c90e5272c631a883bf459e9a0053 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 1 Oct 2024 17:07:52 +0200 Subject: [PATCH 5/5] Mention that __eq__() is not honored --- Doc/c-api/unicode.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 3d627180f47432..cdfca96bcd1fed 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -1439,6 +1439,9 @@ They all return ``NULL`` or ``-1`` if an exception occurs. The function always succeeds if *a* and *b* are :class:`str` objects. + The function works for :class:`str` subclasses, but does not honor custom + ``__eq__()`` method. + .. seealso:: The :c:func:`PyUnicode_Compare` function. pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy