Skip to content

Commit f1cffbb

Browse files
committed
WIP: Add PyUnicodeWriter API
TODO: update API doc.
1 parent ea1f7f6 commit f1cffbb

File tree

3 files changed

+312
-0
lines changed

3 files changed

+312
-0
lines changed

docs/changelog.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,19 @@
11
Changelog
22
=========
33

4+
* 2024-07-18: Add functions:
5+
6+
* ``PyUnicodeWriter_Create()``
7+
* ``PyUnicodeWriter_Discard()``
8+
* ``PyUnicodeWriter_Finish()``
9+
* ``PyUnicodeWriter_WriteChar()``
10+
* ``PyUnicodeWriter_WriteUTF8()``
11+
* ``PyUnicodeWriter_WriteStr()``
12+
* ``PyUnicodeWriter_WriteRepr()``
13+
* ``PyUnicodeWriter_WriteSubstring()``
14+
* ``PyUnicodeWriter_WriteWideChar()``
15+
* ``PyUnicodeWriter_Format()``
16+
417
* 2024-06-03: Add ``PyLong_GetSign()``.
518
* 2024-04-23: Drop Python 3.5 support. It cannot be tested anymore (pip fails).
619
* 2024-04-02: Add ``PyDict_SetDefaultRef()`` function.

pythoncapi_compat.h

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1338,6 +1338,159 @@ PyDict_SetDefaultRef(PyObject *d, PyObject *key, PyObject *default_value,
13381338
}
13391339
#endif
13401340

1341+
#if PY_VERSION_HEX < 0x030E0000 && PY_VERSION_HEX >= 0x03060000 && !defined(PYPY_VERSION)
1342+
typedef struct PyUnicodeWriter PyUnicodeWriter;
1343+
1344+
static inline void PyUnicodeWriter_Discard(PyUnicodeWriter *writer)
1345+
{
1346+
_PyUnicodeWriter_Dealloc((_PyUnicodeWriter*)writer);
1347+
PyMem_Free(writer);
1348+
}
1349+
1350+
static inline PyUnicodeWriter* PyUnicodeWriter_Create(Py_ssize_t length)
1351+
{
1352+
if (length < 0) {
1353+
PyErr_SetString(PyExc_ValueError,
1354+
"length must be positive");
1355+
return NULL;
1356+
}
1357+
1358+
const size_t size = sizeof(_PyUnicodeWriter);
1359+
PyUnicodeWriter *pub_writer = (PyUnicodeWriter *)PyMem_Malloc(size);
1360+
if (pub_writer == _Py_NULL) {
1361+
PyErr_NoMemory();
1362+
return _Py_NULL;
1363+
}
1364+
_PyUnicodeWriter *writer = (_PyUnicodeWriter *)pub_writer;
1365+
1366+
_PyUnicodeWriter_Init(writer);
1367+
if (_PyUnicodeWriter_Prepare(writer, length, 127) < 0) {
1368+
PyUnicodeWriter_Discard(pub_writer);
1369+
return NULL;
1370+
}
1371+
writer->overallocate = 1;
1372+
return pub_writer;
1373+
}
1374+
1375+
static inline PyObject* PyUnicodeWriter_Finish(PyUnicodeWriter *writer)
1376+
{
1377+
PyObject *str = _PyUnicodeWriter_Finish((_PyUnicodeWriter*)writer);
1378+
assert(((_PyUnicodeWriter*)writer)->buffer == NULL);
1379+
PyMem_Free(writer);
1380+
return str;
1381+
}
1382+
1383+
static inline int
1384+
PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch)
1385+
{
1386+
if (ch > 0x10ffff) {
1387+
PyErr_SetString(PyExc_ValueError,
1388+
"character must be in range(0x110000)");
1389+
return -1;
1390+
}
1391+
1392+
return _PyUnicodeWriter_WriteChar((_PyUnicodeWriter*)writer, ch);
1393+
}
1394+
1395+
int
1396+
PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj)
1397+
{
1398+
PyObject *str = PyObject_Str(obj);
1399+
if (str == NULL) {
1400+
return -1;
1401+
}
1402+
1403+
int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str);
1404+
Py_DECREF(str);
1405+
return res;
1406+
}
1407+
1408+
int
1409+
PyUnicodeWriter_WriteRepr(PyUnicodeWriter *writer, PyObject *obj)
1410+
{
1411+
PyObject *str = PyObject_Repr(obj);
1412+
if (str == NULL) {
1413+
return -1;
1414+
}
1415+
1416+
int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str);
1417+
Py_DECREF(str);
1418+
return res;
1419+
}
1420+
1421+
static inline int
1422+
PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
1423+
const char *str, Py_ssize_t size)
1424+
{
1425+
if (size < 0) {
1426+
size = (Py_ssize_t)strlen(str);
1427+
}
1428+
1429+
PyObject *str_obj = PyUnicode_FromStringAndSize(str, size);
1430+
if (str_obj == _Py_NULL) {
1431+
return -1;
1432+
}
1433+
1434+
int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str_obj);
1435+
Py_DECREF(str_obj);
1436+
return res;
1437+
}
1438+
1439+
static inline int
1440+
PyUnicodeWriter_WriteWideChar(PyUnicodeWriter *writer,
1441+
const wchar_t *str, Py_ssize_t size)
1442+
{
1443+
if (size < 0) {
1444+
size = (Py_ssize_t)wcslen(str);
1445+
}
1446+
1447+
PyObject *str_obj = PyUnicode_FromWideChar(str, size);
1448+
if (str_obj == _Py_NULL) {
1449+
return -1;
1450+
}
1451+
1452+
int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str_obj);
1453+
Py_DECREF(str_obj);
1454+
return res;
1455+
}
1456+
1457+
static inline int
1458+
PyUnicodeWriter_WriteSubstring(PyUnicodeWriter *writer, PyObject *str,
1459+
Py_ssize_t start, Py_ssize_t end)
1460+
{
1461+
if (!PyUnicode_Check(str)) {
1462+
PyErr_Format(PyExc_TypeError, "expect str, not %T", str);
1463+
return -1;
1464+
}
1465+
if (start < 0 || start > end) {
1466+
PyErr_Format(PyExc_ValueError, "invalid start argument");
1467+
return -1;
1468+
}
1469+
if (end > PyUnicode_GET_LENGTH(str)) {
1470+
PyErr_Format(PyExc_ValueError, "invalid end argument");
1471+
return -1;
1472+
}
1473+
1474+
return _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter*)writer, str,
1475+
start, end);
1476+
}
1477+
1478+
static inline int
1479+
PyUnicodeWriter_Format(PyUnicodeWriter *writer, const char *format, ...)
1480+
{
1481+
va_list vargs;
1482+
va_start(vargs, format);
1483+
PyObject *str = PyUnicode_FromFormatV(format, vargs);
1484+
va_end(vargs);
1485+
if (str == _Py_NULL) {
1486+
return -1;
1487+
}
1488+
1489+
int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str);
1490+
Py_DECREF(str);
1491+
return res;
1492+
}
1493+
#endif // PY_VERSION_HEX < 0x030E0000
13411494

13421495
// gh-116560 added PyLong_GetSign() to Python 3.14.0a0
13431496
#if PY_VERSION_HEX < 0x030E00A0

tests/test_pythoncapi_compat_cext.c

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1733,6 +1733,147 @@ test_get_constant(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args))
17331733
}
17341734

17351735

1736+
#if PY_VERSION_HEX < 0x030E0000 && PY_VERSION_HEX >= 0x03060000 && !defined(PYPY_VERSION)
1737+
#define TEST_UNICODEWRITER 1
1738+
1739+
static PyObject *
1740+
test_unicodewriter(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args))
1741+
{
1742+
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
1743+
if (writer == NULL) {
1744+
return NULL;
1745+
}
1746+
int ret;
1747+
1748+
// test PyUnicodeWriter_WriteStr()
1749+
PyObject *str = PyUnicode_FromString("var");
1750+
if (str == NULL) {
1751+
goto error;
1752+
}
1753+
ret = PyUnicodeWriter_WriteStr(writer, str);
1754+
Py_CLEAR(str);
1755+
if (ret < 0) {
1756+
goto error;
1757+
}
1758+
1759+
// test PyUnicodeWriter_WriteChar()
1760+
if (PyUnicodeWriter_WriteChar(writer, '=') < 0) {
1761+
goto error;
1762+
}
1763+
1764+
// test PyUnicodeWriter_WriteSubstring()
1765+
str = PyUnicode_FromString("[long]");
1766+
if (str == NULL) {
1767+
goto error;
1768+
}
1769+
ret = PyUnicodeWriter_WriteSubstring(writer, str, 1, 5);
1770+
Py_CLEAR(str);
1771+
if (ret < 0) {
1772+
goto error;
1773+
}
1774+
1775+
// test PyUnicodeWriter_WriteUTF8()
1776+
if (PyUnicodeWriter_WriteUTF8(writer, " valu\xC3\xA9", -1) < 0) {
1777+
goto error;
1778+
}
1779+
if (PyUnicodeWriter_WriteChar(writer, ' ') < 0) {
1780+
goto error;
1781+
}
1782+
1783+
// test PyUnicodeWriter_WriteRepr()
1784+
str = PyUnicode_FromString("repr");
1785+
if (str == NULL) {
1786+
goto error;
1787+
}
1788+
if (PyUnicodeWriter_WriteRepr(writer, str) < 0) {
1789+
goto error;
1790+
}
1791+
Py_CLEAR(str);
1792+
1793+
{
1794+
PyObject *result = PyUnicodeWriter_Finish(writer);
1795+
if (result == NULL) {
1796+
return NULL;
1797+
}
1798+
assert(PyUnicode_EqualToUTF8(result, "var=long valu\xC3\xA9 'repr'"));
1799+
Py_DECREF(result);
1800+
}
1801+
1802+
Py_RETURN_NONE;
1803+
1804+
error:
1805+
PyUnicodeWriter_Discard(writer);
1806+
return NULL;
1807+
}
1808+
1809+
1810+
static PyObject *
1811+
test_unicodewriter_widechar(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args))
1812+
{
1813+
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
1814+
if (writer == NULL) {
1815+
return NULL;
1816+
}
1817+
1818+
// test PyUnicodeWriter_WriteWideChar()
1819+
int ret = PyUnicodeWriter_WriteWideChar(writer, L"euro=\u20AC", -1);
1820+
if (ret < 0) {
1821+
goto error;
1822+
}
1823+
1824+
{
1825+
PyObject *result = PyUnicodeWriter_Finish(writer);
1826+
if (result == NULL) {
1827+
return NULL;
1828+
}
1829+
assert(PyUnicode_EqualToUTF8(result, "euro=\xe2\x82\xac"));
1830+
Py_DECREF(result);
1831+
}
1832+
1833+
Py_RETURN_NONE;
1834+
1835+
error:
1836+
PyUnicodeWriter_Discard(writer);
1837+
return NULL;
1838+
}
1839+
1840+
1841+
static PyObject *
1842+
test_unicodewriter_format(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args))
1843+
{
1844+
PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
1845+
if (writer == NULL) {
1846+
return NULL;
1847+
}
1848+
1849+
// test PyUnicodeWriter_Format()
1850+
if (PyUnicodeWriter_Format(writer, "%s %i", "Hello", 123) < 0) {
1851+
goto error;
1852+
}
1853+
1854+
// test PyUnicodeWriter_WriteChar()
1855+
if (PyUnicodeWriter_WriteChar(writer, '.') < 0) {
1856+
goto error;
1857+
}
1858+
1859+
{
1860+
PyObject *result = PyUnicodeWriter_Finish(writer);
1861+
if (result == NULL) {
1862+
return NULL;
1863+
}
1864+
assert(PyUnicode_EqualToUTF8(result, "Hello 123."));
1865+
Py_DECREF(result);
1866+
}
1867+
1868+
Py_RETURN_NONE;
1869+
1870+
error:
1871+
PyUnicodeWriter_Discard(writer);
1872+
return NULL;
1873+
}
1874+
#endif
1875+
1876+
17361877
static struct PyMethodDef methods[] = {
17371878
{"test_object", test_object, METH_NOARGS, _Py_NULL},
17381879
{"test_py_is", test_py_is, METH_NOARGS, _Py_NULL},
@@ -1771,6 +1912,11 @@ static struct PyMethodDef methods[] = {
17711912
{"test_time", test_time, METH_NOARGS, _Py_NULL},
17721913
#endif
17731914
{"test_get_constant", test_get_constant, METH_NOARGS, _Py_NULL},
1915+
#ifdef TEST_UNICODEWRITER
1916+
{"test_unicodewriter", test_unicodewriter, METH_NOARGS, _Py_NULL},
1917+
{"test_unicodewriter_widechar", test_unicodewriter_widechar, METH_NOARGS, _Py_NULL},
1918+
{"test_unicodewriter_format", test_unicodewriter_format, METH_NOARGS, _Py_NULL},
1919+
#endif
17741920
{_Py_NULL, _Py_NULL, 0, _Py_NULL}
17751921
};
17761922

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy