From f1cffbb9a99b95f3753cd948389ece2968b09514 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 23 May 2024 00:29:36 +0200 Subject: [PATCH] WIP: Add PyUnicodeWriter API TODO: update API doc. --- docs/changelog.rst | 13 +++ pythoncapi_compat.h | 153 ++++++++++++++++++++++++++++ tests/test_pythoncapi_compat_cext.c | 146 ++++++++++++++++++++++++++ 3 files changed, 312 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 4d0fb9d..1e7ba2a 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,19 @@ Changelog ========= +* 2024-07-18: Add functions: + + * ``PyUnicodeWriter_Create()`` + * ``PyUnicodeWriter_Discard()`` + * ``PyUnicodeWriter_Finish()`` + * ``PyUnicodeWriter_WriteChar()`` + * ``PyUnicodeWriter_WriteUTF8()`` + * ``PyUnicodeWriter_WriteStr()`` + * ``PyUnicodeWriter_WriteRepr()`` + * ``PyUnicodeWriter_WriteSubstring()`` + * ``PyUnicodeWriter_WriteWideChar()`` + * ``PyUnicodeWriter_Format()`` + * 2024-06-03: Add ``PyLong_GetSign()``. * 2024-04-23: Drop Python 3.5 support. It cannot be tested anymore (pip fails). * 2024-04-02: Add ``PyDict_SetDefaultRef()`` function. diff --git a/pythoncapi_compat.h b/pythoncapi_compat.h index 51e8c0d..d45828f 100644 --- a/pythoncapi_compat.h +++ b/pythoncapi_compat.h @@ -1338,6 +1338,159 @@ PyDict_SetDefaultRef(PyObject *d, PyObject *key, PyObject *default_value, } #endif +#if PY_VERSION_HEX < 0x030E0000 && PY_VERSION_HEX >= 0x03060000 && !defined(PYPY_VERSION) +typedef struct PyUnicodeWriter PyUnicodeWriter; + +static inline void PyUnicodeWriter_Discard(PyUnicodeWriter *writer) +{ + _PyUnicodeWriter_Dealloc((_PyUnicodeWriter*)writer); + PyMem_Free(writer); +} + +static inline PyUnicodeWriter* PyUnicodeWriter_Create(Py_ssize_t length) +{ + if (length < 0) { + PyErr_SetString(PyExc_ValueError, + "length must be positive"); + return NULL; + } + + const size_t size = sizeof(_PyUnicodeWriter); + PyUnicodeWriter *pub_writer = (PyUnicodeWriter *)PyMem_Malloc(size); + if (pub_writer == _Py_NULL) { + PyErr_NoMemory(); + return _Py_NULL; + } + _PyUnicodeWriter *writer = (_PyUnicodeWriter *)pub_writer; + + _PyUnicodeWriter_Init(writer); + if (_PyUnicodeWriter_Prepare(writer, length, 127) < 0) { + PyUnicodeWriter_Discard(pub_writer); + return NULL; + } + writer->overallocate = 1; + return pub_writer; +} + +static inline PyObject* PyUnicodeWriter_Finish(PyUnicodeWriter *writer) +{ + PyObject *str = _PyUnicodeWriter_Finish((_PyUnicodeWriter*)writer); + assert(((_PyUnicodeWriter*)writer)->buffer == NULL); + PyMem_Free(writer); + return str; +} + +static inline int +PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch) +{ + if (ch > 0x10ffff) { + PyErr_SetString(PyExc_ValueError, + "character must be in range(0x110000)"); + return -1; + } + + return _PyUnicodeWriter_WriteChar((_PyUnicodeWriter*)writer, ch); +} + +int +PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj) +{ + PyObject *str = PyObject_Str(obj); + if (str == NULL) { + return -1; + } + + int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str); + Py_DECREF(str); + return res; +} + +int +PyUnicodeWriter_WriteRepr(PyUnicodeWriter *writer, PyObject *obj) +{ + PyObject *str = PyObject_Repr(obj); + if (str == NULL) { + return -1; + } + + int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str); + Py_DECREF(str); + return res; +} + +static inline int +PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer, + const char *str, Py_ssize_t size) +{ + if (size < 0) { + size = (Py_ssize_t)strlen(str); + } + + PyObject *str_obj = PyUnicode_FromStringAndSize(str, size); + if (str_obj == _Py_NULL) { + return -1; + } + + int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str_obj); + Py_DECREF(str_obj); + return res; +} + +static inline int +PyUnicodeWriter_WriteWideChar(PyUnicodeWriter *writer, + const wchar_t *str, Py_ssize_t size) +{ + if (size < 0) { + size = (Py_ssize_t)wcslen(str); + } + + PyObject *str_obj = PyUnicode_FromWideChar(str, size); + if (str_obj == _Py_NULL) { + return -1; + } + + int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str_obj); + Py_DECREF(str_obj); + return res; +} + +static inline int +PyUnicodeWriter_WriteSubstring(PyUnicodeWriter *writer, PyObject *str, + Py_ssize_t start, Py_ssize_t end) +{ + if (!PyUnicode_Check(str)) { + PyErr_Format(PyExc_TypeError, "expect str, not %T", str); + return -1; + } + if (start < 0 || start > end) { + PyErr_Format(PyExc_ValueError, "invalid start argument"); + return -1; + } + if (end > PyUnicode_GET_LENGTH(str)) { + PyErr_Format(PyExc_ValueError, "invalid end argument"); + return -1; + } + + return _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter*)writer, str, + start, end); +} + +static inline int +PyUnicodeWriter_Format(PyUnicodeWriter *writer, const char *format, ...) +{ + va_list vargs; + va_start(vargs, format); + PyObject *str = PyUnicode_FromFormatV(format, vargs); + va_end(vargs); + if (str == _Py_NULL) { + return -1; + } + + int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str); + Py_DECREF(str); + return res; +} +#endif // PY_VERSION_HEX < 0x030E0000 // gh-116560 added PyLong_GetSign() to Python 3.14.0a0 #if PY_VERSION_HEX < 0x030E00A0 diff --git a/tests/test_pythoncapi_compat_cext.c b/tests/test_pythoncapi_compat_cext.c index aa7b206..f813548 100644 --- a/tests/test_pythoncapi_compat_cext.c +++ b/tests/test_pythoncapi_compat_cext.c @@ -1733,6 +1733,147 @@ test_get_constant(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) } +#if PY_VERSION_HEX < 0x030E0000 && PY_VERSION_HEX >= 0x03060000 && !defined(PYPY_VERSION) +#define TEST_UNICODEWRITER 1 + +static PyObject * +test_unicodewriter(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args)) +{ + PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); + if (writer == NULL) { + return NULL; + } + int ret; + + // test PyUnicodeWriter_WriteStr() + PyObject *str = PyUnicode_FromString("var"); + if (str == NULL) { + goto error; + } + ret = PyUnicodeWriter_WriteStr(writer, str); + Py_CLEAR(str); + if (ret < 0) { + goto error; + } + + // test PyUnicodeWriter_WriteChar() + if (PyUnicodeWriter_WriteChar(writer, '=') < 0) { + goto error; + } + + // test PyUnicodeWriter_WriteSubstring() + str = PyUnicode_FromString("[long]"); + if (str == NULL) { + goto error; + } + ret = PyUnicodeWriter_WriteSubstring(writer, str, 1, 5); + Py_CLEAR(str); + if (ret < 0) { + goto error; + } + + // test PyUnicodeWriter_WriteUTF8() + if (PyUnicodeWriter_WriteUTF8(writer, " valu\xC3\xA9", -1) < 0) { + goto error; + } + if (PyUnicodeWriter_WriteChar(writer, ' ') < 0) { + goto error; + } + + // test PyUnicodeWriter_WriteRepr() + str = PyUnicode_FromString("repr"); + if (str == NULL) { + goto error; + } + if (PyUnicodeWriter_WriteRepr(writer, str) < 0) { + goto error; + } + Py_CLEAR(str); + + { + PyObject *result = PyUnicodeWriter_Finish(writer); + if (result == NULL) { + return NULL; + } + assert(PyUnicode_EqualToUTF8(result, "var=long valu\xC3\xA9 'repr'")); + Py_DECREF(result); + } + + Py_RETURN_NONE; + +error: + PyUnicodeWriter_Discard(writer); + return NULL; +} + + +static PyObject * +test_unicodewriter_widechar(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args)) +{ + PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); + if (writer == NULL) { + return NULL; + } + + // test PyUnicodeWriter_WriteWideChar() + int ret = PyUnicodeWriter_WriteWideChar(writer, L"euro=\u20AC", -1); + if (ret < 0) { + goto error; + } + + { + PyObject *result = PyUnicodeWriter_Finish(writer); + if (result == NULL) { + return NULL; + } + assert(PyUnicode_EqualToUTF8(result, "euro=\xe2\x82\xac")); + Py_DECREF(result); + } + + Py_RETURN_NONE; + +error: + PyUnicodeWriter_Discard(writer); + return NULL; +} + + +static PyObject * +test_unicodewriter_format(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args)) +{ + PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); + if (writer == NULL) { + return NULL; + } + + // test PyUnicodeWriter_Format() + if (PyUnicodeWriter_Format(writer, "%s %i", "Hello", 123) < 0) { + goto error; + } + + // test PyUnicodeWriter_WriteChar() + if (PyUnicodeWriter_WriteChar(writer, '.') < 0) { + goto error; + } + + { + PyObject *result = PyUnicodeWriter_Finish(writer); + if (result == NULL) { + return NULL; + } + assert(PyUnicode_EqualToUTF8(result, "Hello 123.")); + Py_DECREF(result); + } + + Py_RETURN_NONE; + +error: + PyUnicodeWriter_Discard(writer); + return NULL; +} +#endif + + static struct PyMethodDef methods[] = { {"test_object", test_object, METH_NOARGS, _Py_NULL}, {"test_py_is", test_py_is, METH_NOARGS, _Py_NULL}, @@ -1771,6 +1912,11 @@ static struct PyMethodDef methods[] = { {"test_time", test_time, METH_NOARGS, _Py_NULL}, #endif {"test_get_constant", test_get_constant, METH_NOARGS, _Py_NULL}, +#ifdef TEST_UNICODEWRITER + {"test_unicodewriter", test_unicodewriter, METH_NOARGS, _Py_NULL}, + {"test_unicodewriter_widechar", test_unicodewriter_widechar, METH_NOARGS, _Py_NULL}, + {"test_unicodewriter_format", test_unicodewriter_format, METH_NOARGS, _Py_NULL}, +#endif {_Py_NULL, _Py_NULL, 0, _Py_NULL} }; pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy