Skip to content

Commit 3d69d18

Browse files
vstinnerZeroIntensitypicnixz
authored
[3.14] gh-133968: Add PyUnicodeWriter_WriteASCII() function (#133973) (#134974)
gh-133968: Add PyUnicodeWriter_WriteASCII() function (#133973) Replace most PyUnicodeWriter_WriteUTF8() calls with PyUnicodeWriter_WriteASCII(). (cherry picked from commit f49a07b) Co-authored-by: Peter Bierma <zintensitydev@gmail.com> Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
1 parent dba307a commit 3d69d18

File tree

16 files changed

+98
-31
lines changed

16 files changed

+98
-31
lines changed

Doc/c-api/unicode.rst

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1798,9 +1798,24 @@ object.
17981798
17991799
See also :c:func:`PyUnicodeWriter_DecodeUTF8Stateful`.
18001800
1801+
.. c:function:: int PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer, const char *str, Py_ssize_t size)
1802+
1803+
Write the ASCII string *str* into *writer*.
1804+
1805+
*size* is the string length in bytes. If *size* is equal to ``-1``, call
1806+
``strlen(str)`` to get the string length.
1807+
1808+
*str* must only contain ASCII characters. The behavior is undefined if
1809+
*str* contains non-ASCII characters.
1810+
1811+
On success, return ``0``.
1812+
On error, set an exception, leave the writer unchanged, and return ``-1``.
1813+
1814+
.. versionadded:: next
1815+
18011816
.. c:function:: int PyUnicodeWriter_WriteWideChar(PyUnicodeWriter *writer, const wchar_t *str, Py_ssize_t size)
18021817
1803-
Writer the wide string *str* into *writer*.
1818+
Write the wide string *str* into *writer*.
18041819
18051820
*size* is a number of wide characters. If *size* is equal to ``-1``, call
18061821
``wcslen(str)`` to get the string length.

Doc/whatsnew/3.14.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2584,6 +2584,7 @@ New features
25842584
* :c:func:`PyUnicodeWriter_Discard`
25852585
* :c:func:`PyUnicodeWriter_Finish`
25862586
* :c:func:`PyUnicodeWriter_Format`
2587+
* :c:func:`PyUnicodeWriter_WriteASCII`
25872588
* :c:func:`PyUnicodeWriter_WriteChar`
25882589
* :c:func:`PyUnicodeWriter_WriteRepr`
25892590
* :c:func:`PyUnicodeWriter_WriteStr`
@@ -2860,7 +2861,7 @@ Deprecated
28602861
:c:func:`PyUnicodeWriter_WriteSubstring(writer, str, start, end) <PyUnicodeWriter_WriteSubstring>`.
28612862
* :c:func:`!_PyUnicodeWriter_WriteASCIIString`:
28622863
replace ``_PyUnicodeWriter_WriteASCIIString(&writer, str)`` with
2863-
:c:func:`PyUnicodeWriter_WriteUTF8(writer, str) <PyUnicodeWriter_WriteUTF8>`.
2864+
:c:func:`PyUnicodeWriter_WriteASCII(writer, str) <PyUnicodeWriter_WriteASCII>`.
28642865
* :c:func:`!_PyUnicodeWriter_WriteLatin1String`:
28652866
replace ``_PyUnicodeWriter_WriteLatin1String(&writer, str)`` with
28662867
:c:func:`PyUnicodeWriter_WriteUTF8(writer, str) <PyUnicodeWriter_WriteUTF8>`.

Include/cpython/unicodeobject.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,10 @@ PyAPI_FUNC(int) PyUnicodeWriter_WriteUTF8(
478478
PyUnicodeWriter *writer,
479479
const char *str,
480480
Py_ssize_t size);
481+
PyAPI_FUNC(int) PyUnicodeWriter_WriteASCII(
482+
PyUnicodeWriter *writer,
483+
const char *str,
484+
Py_ssize_t size);
481485
PyAPI_FUNC(int) PyUnicodeWriter_WriteWideChar(
482486
PyUnicodeWriter *writer,
483487
const wchar_t *str,

Lib/test/test_capi/test_unicode.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1776,6 +1776,13 @@ def test_utf8(self):
17761776
self.assertEqual(writer.finish(),
17771777
"ascii-latin1=\xE9-euro=\u20AC.")
17781778

1779+
def test_ascii(self):
1780+
writer = self.create_writer(0)
1781+
writer.write_ascii(b"Hello ", -1)
1782+
writer.write_ascii(b"", 0)
1783+
writer.write_ascii(b"Python! <truncated>", 6)
1784+
self.assertEqual(writer.finish(), "Hello Python")
1785+
17791786
def test_invalid_utf8(self):
17801787
writer = self.create_writer(0)
17811788
with self.assertRaises(UnicodeDecodeError):
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Add :c:func:`PyUnicodeWriter_WriteASCII` function to write an ASCII string
2+
into a :c:type:`PyUnicodeWriter`. The function is faster than
3+
:c:func:`PyUnicodeWriter_WriteUTF8`, but has an undefined behavior if the
4+
input string contains non-ASCII characters. Patch by Victor Stinner.

Modules/_json.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1476,13 +1476,13 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
14761476
int rv;
14771477

14781478
if (obj == Py_None) {
1479-
return PyUnicodeWriter_WriteUTF8(writer, "null", 4);
1479+
return PyUnicodeWriter_WriteASCII(writer, "null", 4);
14801480
}
14811481
else if (obj == Py_True) {
1482-
return PyUnicodeWriter_WriteUTF8(writer, "true", 4);
1482+
return PyUnicodeWriter_WriteASCII(writer, "true", 4);
14831483
}
14841484
else if (obj == Py_False) {
1485-
return PyUnicodeWriter_WriteUTF8(writer, "false", 5);
1485+
return PyUnicodeWriter_WriteASCII(writer, "false", 5);
14861486
}
14871487
else if (PyUnicode_Check(obj)) {
14881488
PyObject *encoded = encoder_encode_string(s, obj);
@@ -1655,7 +1655,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
16551655

16561656
if (PyDict_GET_SIZE(dct) == 0) {
16571657
/* Fast path */
1658-
return PyUnicodeWriter_WriteUTF8(writer, "{}", 2);
1658+
return PyUnicodeWriter_WriteASCII(writer, "{}", 2);
16591659
}
16601660

16611661
if (s->markers != Py_None) {
@@ -1756,7 +1756,7 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
17561756
return -1;
17571757
if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
17581758
Py_DECREF(s_fast);
1759-
return PyUnicodeWriter_WriteUTF8(writer, "[]", 2);
1759+
return PyUnicodeWriter_WriteASCII(writer, "[]", 2);
17601760
}
17611761

17621762
if (s->markers != Py_None) {

Modules/_ssl.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -563,7 +563,7 @@ fill_and_set_sslerror(_sslmodulestate *state,
563563
goto fail;
564564
}
565565
}
566-
if (PyUnicodeWriter_WriteUTF8(writer, "] ", 2) < 0) {
566+
if (PyUnicodeWriter_WriteASCII(writer, "] ", 2) < 0) {
567567
goto fail;
568568
}
569569
}

Modules/_testcapi/unicode.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,27 @@ writer_write_utf8(PyObject *self_raw, PyObject *args)
332332
}
333333

334334

335+
static PyObject*
336+
writer_write_ascii(PyObject *self_raw, PyObject *args)
337+
{
338+
WriterObject *self = (WriterObject *)self_raw;
339+
if (writer_check(self) < 0) {
340+
return NULL;
341+
}
342+
343+
char *str;
344+
Py_ssize_t size;
345+
if (!PyArg_ParseTuple(args, "yn", &str, &size)) {
346+
return NULL;
347+
}
348+
349+
if (PyUnicodeWriter_WriteASCII(self->writer, str, size) < 0) {
350+
return NULL;
351+
}
352+
Py_RETURN_NONE;
353+
}
354+
355+
335356
static PyObject*
336357
writer_write_widechar(PyObject *self_raw, PyObject *args)
337358
{
@@ -513,6 +534,7 @@ writer_finish(PyObject *self_raw, PyObject *Py_UNUSED(args))
513534
static PyMethodDef writer_methods[] = {
514535
{"write_char", _PyCFunction_CAST(writer_write_char), METH_VARARGS},
515536
{"write_utf8", _PyCFunction_CAST(writer_write_utf8), METH_VARARGS},
537+
{"write_ascii", _PyCFunction_CAST(writer_write_ascii), METH_VARARGS},
516538
{"write_widechar", _PyCFunction_CAST(writer_write_widechar), METH_VARARGS},
517539
{"write_ucs4", _PyCFunction_CAST(writer_write_ucs4), METH_VARARGS},
518540
{"write_str", _PyCFunction_CAST(writer_write_str), METH_VARARGS},

Objects/genericaliasobject.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ ga_repr_items_list(PyUnicodeWriter *writer, PyObject *p)
6565

6666
for (Py_ssize_t i = 0; i < len; i++) {
6767
if (i > 0) {
68-
if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
68+
if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
6969
return -1;
7070
}
7171
}
@@ -109,7 +109,7 @@ ga_repr(PyObject *self)
109109
}
110110
for (Py_ssize_t i = 0; i < len; i++) {
111111
if (i > 0) {
112-
if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
112+
if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
113113
goto error;
114114
}
115115
}
@@ -126,7 +126,7 @@ ga_repr(PyObject *self)
126126
}
127127
if (len == 0) {
128128
// for something like tuple[()] we should print a "()"
129-
if (PyUnicodeWriter_WriteUTF8(writer, "()", 2) < 0) {
129+
if (PyUnicodeWriter_WriteASCII(writer, "()", 2) < 0) {
130130
goto error;
131131
}
132132
}

Objects/typevarobject.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ constevaluator_call(PyObject *self, PyObject *args, PyObject *kwargs)
192192
for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(value); i++) {
193193
PyObject *item = PyTuple_GET_ITEM(value, i);
194194
if (i > 0) {
195-
if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
195+
if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
196196
PyUnicodeWriter_Discard(writer);
197197
return NULL;
198198
}
@@ -273,7 +273,7 @@ _Py_typing_type_repr(PyUnicodeWriter *writer, PyObject *p)
273273
}
274274

275275
if (p == (PyObject *)&_PyNone_Type) {
276-
return PyUnicodeWriter_WriteUTF8(writer, "None", 4);
276+
return PyUnicodeWriter_WriteASCII(writer, "None", 4);
277277
}
278278

279279
if ((rc = PyObject_HasAttrWithError(p, &_Py_ID(__origin__))) > 0 &&

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy