Skip to content

Commit 99ab0d3

Browse files
authored
Add PyUnicode_EqualToUTF8() function (python#78)
Add PyUnicode_EqualToUTF8() and PyUnicode_EqualToUTF8AndSize() functions.
1 parent dab5f98 commit 99ab0d3

File tree

4 files changed

+121
-0
lines changed

4 files changed

+121
-0
lines changed

docs/api.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,14 @@ Python 3.13
113113
114114
Available on Python 3.5.2 and newer.
115115
116+
.. c:function:: int PyUnicode_EqualToUTF8(PyObject *unicode, const char *str)
117+
118+
See `PyUnicode_EqualToUTF8() documentation <https://docs.python.org/dev/c-api/unicode.html#c.PyUnicode_EqualToUTF8>`__.
119+
120+
.. c:function:: int PyUnicode_EqualToUTF8AndSize(PyObject *unicode, const char *str, Py_ssize_t size)
121+
122+
See `PyUnicode_EqualToUTF8AndSize() documentation <https://docs.python.org/dev/c-api/unicode.html#c.PyUnicode_EqualToUTF8AndSize>`__.
123+
116124
117125
Python 3.12
118126
-----------

docs/changelog.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
Changelog
22
=========
33

4+
* 2023-10-04: Add functions:
5+
6+
* ``PyUnicode_EqualToUTF8()``
7+
* ``PyUnicode_EqualToUTF8AndSize()``
8+
49
* 2023-10-03: Add functions:
510

611
* ``PyObject_VisitManagedDict()``

pythoncapi_compat.h

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -939,6 +939,79 @@ PyThreadState_GetUnchecked(void)
939939
}
940940
#endif
941941

942+
// gh-110289 added PyUnicode_EqualToUTF8() and PyUnicode_EqualToUTF8AndSize()
943+
// to Python 3.13.0a1
944+
#if PY_VERSION_HEX < 0x030D00A1
945+
static inline int
946+
PyUnicode_EqualToUTF8AndSize(PyObject *unicode, const char *str, Py_ssize_t str_len)
947+
{
948+
Py_ssize_t len;
949+
const void *utf8;
950+
PyObject *exc_type, *exc_value, *exc_tb;
951+
int res;
952+
953+
// API cannot report errors so save/restore the exception
954+
PyErr_Fetch(&exc_type, &exc_value, &exc_tb);
955+
956+
// Python 3.3.0a1 added PyUnicode_AsUTF8AndSize()
957+
#if PY_VERSION_HEX >= 0x030300A1
958+
if (PyUnicode_IS_ASCII(unicode)) {
959+
utf8 = PyUnicode_DATA(unicode);
960+
len = PyUnicode_GET_LENGTH(unicode);
961+
}
962+
else {
963+
utf8 = PyUnicode_AsUTF8AndSize(unicode, &len);
964+
if (utf8 == NULL) {
965+
// Memory allocation failure. The API cannot report error,
966+
// so ignore the exception and return 0.
967+
res = 0;
968+
goto done;
969+
}
970+
}
971+
972+
if (len != str_len) {
973+
res = 0;
974+
goto done;
975+
}
976+
res = (memcmp(utf8, str, (size_t)len) == 0);
977+
#else
978+
PyObject *bytes = PyUnicode_AsUTF8String(unicode);
979+
if (bytes == NULL) {
980+
// Memory allocation failure. The API cannot report error,
981+
// so ignore the exception and return 0.
982+
res = 0;
983+
goto done;
984+
}
985+
986+
#if PY_VERSION_HEX >= 0x03000000
987+
len = PyBytes_GET_SIZE(bytes);
988+
utf8 = PyBytes_AS_STRING(bytes);
989+
#else
990+
len = PyString_GET_SIZE(bytes);
991+
utf8 = PyString_AS_STRING(bytes);
992+
#endif
993+
if (len != str_len) {
994+
Py_DECREF(bytes);
995+
res = 0;
996+
goto done;
997+
}
998+
999+
res = (memcmp(utf8, str, (size_t)len) == 0);
1000+
Py_DECREF(bytes);
1001+
#endif
1002+
1003+
done:
1004+
PyErr_Restore(exc_type, exc_value, exc_tb);
1005+
return res;
1006+
}
1007+
1008+
static inline int
1009+
PyUnicode_EqualToUTF8(PyObject *unicode, const char *str)
1010+
{
1011+
return PyUnicode_EqualToUTF8AndSize(unicode, str, (Py_ssize_t)strlen(str));
1012+
}
1013+
#endif
1014+
9421015

9431016
#ifdef __cplusplus
9441017
}

tests/test_pythoncapi_compat_cext.c

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1361,6 +1361,40 @@ test_managed_dict(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args))
13611361
#endif // PY_VERSION_HEX >= 0x030B00A3
13621362

13631363

1364+
static PyObject *
1365+
test_unicode(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args))
1366+
{
1367+
PyObject *abc = PyUnicode_FromString("abc");
1368+
if (abc == NULL) {
1369+
return NULL;
1370+
}
1371+
1372+
PyObject *abc0def = PyUnicode_FromStringAndSize("abc\0def", 7);
1373+
if (abc == NULL) {
1374+
return NULL;
1375+
}
1376+
1377+
// PyUnicode_EqualToUTF8() and PyUnicode_EqualToUTF8AndSize() can be called
1378+
// with an exception raised and they must not clear the current exception.
1379+
PyErr_NoMemory();
1380+
1381+
assert(PyUnicode_EqualToUTF8AndSize(abc, "abc", 3) == 1);
1382+
assert(PyUnicode_EqualToUTF8AndSize(abc, "Python", 6) == 0);
1383+
assert(PyUnicode_EqualToUTF8AndSize(abc0def, "abc\0def", 7) == 1);
1384+
1385+
assert(PyUnicode_EqualToUTF8(abc, "abc") == 1);
1386+
assert(PyUnicode_EqualToUTF8(abc, "Python") == 0);
1387+
assert(PyUnicode_EqualToUTF8(abc0def, "abc\0def") == 0);
1388+
1389+
assert(PyErr_ExceptionMatches(PyExc_MemoryError));
1390+
PyErr_Clear();
1391+
1392+
Py_DECREF(abc);
1393+
Py_DECREF(abc0def);
1394+
Py_RETURN_NONE;
1395+
}
1396+
1397+
13641398
static struct PyMethodDef methods[] = {
13651399
{"test_object", test_object, METH_NOARGS, _Py_NULL},
13661400
{"test_py_is", test_py_is, METH_NOARGS, _Py_NULL},
@@ -1390,6 +1424,7 @@ static struct PyMethodDef methods[] = {
13901424
#ifdef TEST_MANAGED_DICT
13911425
{"test_managed_dict", test_managed_dict, METH_NOARGS, _Py_NULL},
13921426
#endif
1427+
{"test_unicode", test_unicode, METH_NOARGS, _Py_NULL},
13931428
{_Py_NULL, _Py_NULL, 0, _Py_NULL}
13941429
};
13951430

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy