Skip to content

Commit 169d423

Browse files
committed
gh-119182: Add PyUnicodeWriter_WriteUCS4() function
1 parent 913a956 commit 169d423

File tree

7 files changed

+106
-0
lines changed

7 files changed

+106
-0
lines changed

Doc/c-api/unicode.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1563,6 +1563,16 @@ object.
15631563
On success, return ``0``.
15641564
On error, set an exception, leave the writer unchanged, and return ``-1``.
15651565
1566+
.. c:function:: int PyUnicodeWriter_WriteUCS4(PyUnicodeWriter *writer, Py_UCS4 *str, Py_ssize_t size)
1567+
1568+
Writer the UCS4 string *str* into *writer*.
1569+
1570+
*size* is a number of UCS4 characters. If *size* is equal to ``-1``, get the
1571+
string length (search the NUL character).
1572+
1573+
On success, return ``0``.
1574+
On error, set an exception, leave the writer unchanged, and return ``-1``.
1575+
15661576
.. c:function:: int PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj)
15671577
15681578
Call :c:func:`PyObject_Str` on *obj* and write the output into *writer*.

Doc/whatsnew/3.14.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ New Features
291291
* :c:func:`PyUnicodeWriter_Finish`.
292292
* :c:func:`PyUnicodeWriter_WriteChar`.
293293
* :c:func:`PyUnicodeWriter_WriteUTF8`.
294+
* :c:func:`PyUnicodeWriter_WriteUCS4`.
294295
* :c:func:`PyUnicodeWriter_WriteWideChar`.
295296
* :c:func:`PyUnicodeWriter_WriteStr`.
296297
* :c:func:`PyUnicodeWriter_WriteRepr`.

Include/cpython/unicodeobject.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,10 @@ PyAPI_FUNC(int) PyUnicodeWriter_WriteWideChar(
463463
PyUnicodeWriter *writer,
464464
const wchar_t *str,
465465
Py_ssize_t size);
466+
PyAPI_FUNC(int) PyUnicodeWriter_WriteUCS4(
467+
PyUnicodeWriter *writer,
468+
Py_UCS4 *str,
469+
Py_ssize_t size);
466470

467471
PyAPI_FUNC(int) PyUnicodeWriter_WriteStr(
468472
PyUnicodeWriter *writer,

Lib/test/test_capi/test_unicode.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1787,6 +1787,19 @@ def test_widechar(self):
17871787
writer.write_char('.')
17881788
self.assertEqual(writer.finish(), "latin1=\xE9-euro=\u20AC.")
17891789

1790+
def test_ucs4(self):
1791+
writer = self.create_writer(0)
1792+
writer.write_ucs4("ascii", -1)
1793+
writer.write_char("-")
1794+
writer.write_ucs4("latin1=\xe9", -1)
1795+
writer.write_char("-")
1796+
writer.write_ucs4("euro=\u20ac", -1)
1797+
writer.write_char("-")
1798+
writer.write_ucs4("max=\U0010ffff", -1)
1799+
writer.write_char(".")
1800+
self.assertEqual(writer.finish(),
1801+
"ascii-latin1=\xE9-euro=\u20AC-max=\U0010ffff.")
1802+
17901803

17911804
@unittest.skipIf(ctypes is None, 'need ctypes')
17921805
class PyUnicodeWriterFormatTest(unittest.TestCase):

Misc/NEWS.d/next/C API/2024-06-07-22-12-30.gh-issue-119182.yt8Ar7.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,12 @@ Add a new :c:type:`PyUnicodeWriter` API to create a Python :class:`str` object:
55
* :c:func:`PyUnicodeWriter_Finish`.
66
* :c:func:`PyUnicodeWriter_WriteChar`.
77
* :c:func:`PyUnicodeWriter_WriteUTF8`.
8+
* :c:func:`PyUnicodeWriter_WriteUCS4`.
9+
* :c:func:`PyUnicodeWriter_WriteWideChar`.
810
* :c:func:`PyUnicodeWriter_WriteStr`.
911
* :c:func:`PyUnicodeWriter_WriteRepr`.
1012
* :c:func:`PyUnicodeWriter_WriteSubstring`.
1113
* :c:func:`PyUnicodeWriter_Format`.
14+
* :c:func:`PyUnicodeWriter_DecodeUTF8Stateful`.
1215

1316
Patch by Victor Stinner.

Modules/_testcapi/unicode.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,36 @@ writer_write_widechar(PyObject *self_raw, PyObject *args)
360360
}
361361

362362

363+
static PyObject*
364+
writer_write_ucs4(PyObject *self_raw, PyObject *args)
365+
{
366+
WriterObject *self = (WriterObject *)self_raw;
367+
if (writer_check(self) < 0) {
368+
return NULL;
369+
}
370+
371+
PyObject *str;
372+
Py_ssize_t size;
373+
if (!PyArg_ParseTuple(args, "Un", &str, &size)) {
374+
return NULL;
375+
}
376+
Py_ssize_t len = PyUnicode_GET_LENGTH(str);
377+
size = Py_MIN(size, len);
378+
379+
Py_UCS4 *ucs4 = PyUnicode_AsUCS4Copy(str);
380+
if (ucs4 == NULL) {
381+
return NULL;
382+
}
383+
384+
int res = PyUnicodeWriter_WriteUCS4(self->writer, ucs4, size);
385+
PyMem_Free(ucs4);
386+
if (res < 0) {
387+
return NULL;
388+
}
389+
Py_RETURN_NONE;
390+
}
391+
392+
363393
static PyObject*
364394
writer_write_str(PyObject *self_raw, PyObject *args)
365395
{
@@ -484,6 +514,7 @@ static PyMethodDef writer_methods[] = {
484514
{"write_char", _PyCFunction_CAST(writer_write_char), METH_VARARGS},
485515
{"write_utf8", _PyCFunction_CAST(writer_write_utf8), METH_VARARGS},
486516
{"write_widechar", _PyCFunction_CAST(writer_write_widechar), METH_VARARGS},
517+
{"write_ucs4", _PyCFunction_CAST(writer_write_ucs4), METH_VARARGS},
487518
{"write_str", _PyCFunction_CAST(writer_write_str), METH_VARARGS},
488519
{"write_repr", _PyCFunction_CAST(writer_write_repr), METH_VARARGS},
489520
{"write_substring", _PyCFunction_CAST(writer_write_substring), METH_VARARGS},

Objects/unicodeobject.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2289,6 +2289,50 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
22892289
return res;
22902290
}
22912291

2292+
2293+
int
2294+
PyUnicodeWriter_WriteUCS4(PyUnicodeWriter *pub_writer,
2295+
Py_UCS4 *str,
2296+
Py_ssize_t size)
2297+
{
2298+
_PyUnicodeWriter *writer = (_PyUnicodeWriter*)pub_writer;
2299+
2300+
if (size < 0) {
2301+
size = 0;
2302+
for (; str[size] != '\0'; size++);
2303+
}
2304+
2305+
if (size == 0) {
2306+
return 0;
2307+
}
2308+
2309+
Py_UCS4 max_char = ucs4lib_find_max_char(str, str + size);
2310+
2311+
if (_PyUnicodeWriter_Prepare(writer, size, max_char) < 0) {
2312+
return -1;
2313+
}
2314+
2315+
int kind = writer->kind;
2316+
void *data = (Py_UCS1*)writer->data + writer->pos * kind;
2317+
if (kind == PyUnicode_1BYTE_KIND) {
2318+
_PyUnicode_CONVERT_BYTES(Py_UCS4, Py_UCS1,
2319+
str, str + size,
2320+
data);
2321+
}
2322+
else if (kind == PyUnicode_2BYTE_KIND) {
2323+
_PyUnicode_CONVERT_BYTES(Py_UCS4, Py_UCS2,
2324+
str, str + size,
2325+
data);
2326+
}
2327+
else {
2328+
memcpy(data, str, size * sizeof(Py_UCS4));
2329+
}
2330+
writer->pos += size;
2331+
2332+
return 0;
2333+
}
2334+
2335+
22922336
PyObject*
22932337
PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size)
22942338
{

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy