diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 684432da81c61f..ef180464ef1688 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -1802,9 +1802,24 @@ object. See also :c:func:`PyUnicodeWriter_DecodeUTF8Stateful`. +.. c:function:: int PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer, const char *str, Py_ssize_t size) + + Write the ASCII string *str* into *writer*. + + *size* is the string length in bytes. If *size* is equal to ``-1``, call + ``strlen(str)`` to get the string length. + + *str* must only contain ASCII characters. The behavior is undefined if + *str* contains non-ASCII characters. + + On success, return ``0``. + On error, set an exception, leave the writer unchanged, and return ``-1``. + + .. versionadded:: next + .. c:function:: int PyUnicodeWriter_WriteWideChar(PyUnicodeWriter *writer, const wchar_t *str, Py_ssize_t size) - Writer the wide string *str* into *writer*. + Write the wide string *str* into *writer*. *size* is a number of wide characters. If *size* is equal to ``-1``, call ``wcslen(str)`` to get the string length. diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index ced9c63071a53c..6d1f653f086a15 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -226,6 +226,13 @@ New features functions as replacements for :c:func:`PySys_GetObject`. (Contributed by Serhiy Storchaka in :gh:`108512`.) +* Add :c:func:`PyUnicodeWriter_WriteASCII` function to write an ASCII string + into a :c:type:`PyUnicodeWriter`. The function is faster than + :c:func:`PyUnicodeWriter_WriteUTF8`, but has an undefined behavior if the + input string contains non-ASCII characters. + (Contributed by Victor Stinner in :gh:`133968`.) + + Porting to Python 3.15 ---------------------- diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 136f5d5c5f8425..3d0414f5291fe4 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -478,6 +478,10 @@ PyAPI_FUNC(int) PyUnicodeWriter_WriteUTF8( PyUnicodeWriter *writer, const char *str, Py_ssize_t size); +PyAPI_FUNC(int) PyUnicodeWriter_WriteASCII( + PyUnicodeWriter *writer, + const char *str, + Py_ssize_t size); PyAPI_FUNC(int) PyUnicodeWriter_WriteWideChar( PyUnicodeWriter *writer, const wchar_t *str, diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index 3408c10f426058..c8be4f3faa9483 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -1776,6 +1776,13 @@ def test_utf8(self): self.assertEqual(writer.finish(), "ascii-latin1=\xE9-euro=\u20AC.") + def test_ascii(self): + writer = self.create_writer(0) + writer.write_ascii(b"Hello ", -1) + writer.write_ascii(b"", 0) + writer.write_ascii(b"Python! ", 6) + self.assertEqual(writer.finish(), "Hello Python") + def test_invalid_utf8(self): writer = self.create_writer(0) with self.assertRaises(UnicodeDecodeError): diff --git a/Lib/test/test_ctypes/test_incomplete.py b/Lib/test/test_ctypes/test_incomplete.py index 2f344611995b2c..3189fcd1bd1330 100644 --- a/Lib/test/test_ctypes/test_incomplete.py +++ b/Lib/test/test_ctypes/test_incomplete.py @@ -1,6 +1,5 @@ import ctypes import unittest -import warnings from ctypes import Structure, POINTER, pointer, c_char_p # String-based "incomplete pointers" were implemented in ctypes 0.6.3 (2003, when diff --git a/Misc/NEWS.d/next/C_API/2025-05-13-16-06-46.gh-issue-133968.6alWst.rst b/Misc/NEWS.d/next/C_API/2025-05-13-16-06-46.gh-issue-133968.6alWst.rst new file mode 100644 index 00000000000000..47d5a3bda39942 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2025-05-13-16-06-46.gh-issue-133968.6alWst.rst @@ -0,0 +1,4 @@ +Add :c:func:`PyUnicodeWriter_WriteASCII` function to write an ASCII string +into a :c:type:`PyUnicodeWriter`. The function is faster than +:c:func:`PyUnicodeWriter_WriteUTF8`, but has an undefined behavior if the +input string contains non-ASCII characters. Patch by Victor Stinner. diff --git a/Modules/_json.c b/Modules/_json.c index 89b0a41dd10acb..4aa6ae650651b3 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1476,13 +1476,13 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, int rv; if (obj == Py_None) { - return PyUnicodeWriter_WriteUTF8(writer, "null", 4); + return PyUnicodeWriter_WriteASCII(writer, "null", 4); } else if (obj == Py_True) { - return PyUnicodeWriter_WriteUTF8(writer, "true", 4); + return PyUnicodeWriter_WriteASCII(writer, "true", 4); } else if (obj == Py_False) { - return PyUnicodeWriter_WriteUTF8(writer, "false", 5); + return PyUnicodeWriter_WriteASCII(writer, "false", 5); } else if (PyUnicode_Check(obj)) { PyObject *encoded = encoder_encode_string(s, obj); @@ -1649,7 +1649,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, if (PyDict_GET_SIZE(dct) == 0) { /* Fast path */ - return PyUnicodeWriter_WriteUTF8(writer, "{}", 2); + return PyUnicodeWriter_WriteASCII(writer, "{}", 2); } if (s->markers != Py_None) { @@ -1753,7 +1753,7 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, return -1; if (PySequence_Fast_GET_SIZE(s_fast) == 0) { Py_DECREF(s_fast); - return PyUnicodeWriter_WriteUTF8(writer, "[]", 2); + return PyUnicodeWriter_WriteASCII(writer, "[]", 2); } if (s->markers != Py_None) { diff --git a/Modules/_ssl.c b/Modules/_ssl.c index 976da1340ecf1e..014e624f6c2f00 100644 --- a/Modules/_ssl.c +++ b/Modules/_ssl.c @@ -563,7 +563,7 @@ fill_and_set_sslerror(_sslmodulestate *state, goto fail; } } - if (PyUnicodeWriter_WriteUTF8(writer, "] ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "] ", 2) < 0) { goto fail; } } diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c index b8ecf53f4f8b9c..e70f5c68bc3b69 100644 --- a/Modules/_testcapi/unicode.c +++ b/Modules/_testcapi/unicode.c @@ -332,6 +332,27 @@ writer_write_utf8(PyObject *self_raw, PyObject *args) } +static PyObject* +writer_write_ascii(PyObject *self_raw, PyObject *args) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + char *str; + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "yn", &str, &size)) { + return NULL; + } + + if (PyUnicodeWriter_WriteASCII(self->writer, str, size) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + + static PyObject* writer_write_widechar(PyObject *self_raw, PyObject *args) { @@ -513,6 +534,7 @@ writer_finish(PyObject *self_raw, PyObject *Py_UNUSED(args)) static PyMethodDef writer_methods[] = { {"write_char", _PyCFunction_CAST(writer_write_char), METH_VARARGS}, {"write_utf8", _PyCFunction_CAST(writer_write_utf8), METH_VARARGS}, + {"write_ascii", _PyCFunction_CAST(writer_write_ascii), METH_VARARGS}, {"write_widechar", _PyCFunction_CAST(writer_write_widechar), METH_VARARGS}, {"write_ucs4", _PyCFunction_CAST(writer_write_ucs4), METH_VARARGS}, {"write_str", _PyCFunction_CAST(writer_write_str), METH_VARARGS}, diff --git a/Objects/genericaliasobject.c b/Objects/genericaliasobject.c index ec3d01f00a3c3c..07b57f0c552ce9 100644 --- a/Objects/genericaliasobject.c +++ b/Objects/genericaliasobject.c @@ -65,7 +65,7 @@ ga_repr_items_list(PyUnicodeWriter *writer, PyObject *p) for (Py_ssize_t i = 0; i < len; i++) { if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { return -1; } } @@ -109,7 +109,7 @@ ga_repr(PyObject *self) } for (Py_ssize_t i = 0; i < len; i++) { if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { goto error; } } @@ -126,7 +126,7 @@ ga_repr(PyObject *self) } if (len == 0) { // for something like tuple[()] we should print a "()" - if (PyUnicodeWriter_WriteUTF8(writer, "()", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "()", 2) < 0) { goto error; } } diff --git a/Objects/typevarobject.c b/Objects/typevarobject.c index 6c199a52aa0ae6..cead6e69af5451 100644 --- a/Objects/typevarobject.c +++ b/Objects/typevarobject.c @@ -192,7 +192,7 @@ constevaluator_call(PyObject *self, PyObject *args, PyObject *kwargs) for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(value); i++) { PyObject *item = PyTuple_GET_ITEM(value, i); if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { PyUnicodeWriter_Discard(writer); return NULL; } @@ -273,7 +273,7 @@ _Py_typing_type_repr(PyUnicodeWriter *writer, PyObject *p) } if (p == (PyObject *)&_PyNone_Type) { - return PyUnicodeWriter_WriteUTF8(writer, "None", 4); + return PyUnicodeWriter_WriteASCII(writer, "None", 4); } if ((rc = PyObject_HasAttrWithError(p, &_Py_ID(__origin__))) > 0 && diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index aa94fb91e65fc3..5611f839627a2e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14083,6 +14083,20 @@ _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer, return 0; } + +int +PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer, + const char *str, + Py_ssize_t size) +{ + assert(writer != NULL); + _Py_AssertHoldsTstate(); + + _PyUnicodeWriter *priv_writer = (_PyUnicodeWriter*)writer; + return _PyUnicodeWriter_WriteASCIIString(priv_writer, str, size); +} + + int PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer, const char *str, diff --git a/Objects/unionobject.c b/Objects/unionobject.c index 66435924b6c6c3..00ca5b9bf80341 100644 --- a/Objects/unionobject.c +++ b/Objects/unionobject.c @@ -290,7 +290,7 @@ union_repr(PyObject *self) } for (Py_ssize_t i = 0; i < len; i++) { - if (i > 0 && PyUnicodeWriter_WriteUTF8(writer, " | ", 3) < 0) { + if (i > 0 && PyUnicodeWriter_WriteASCII(writer, " | ", 3) < 0) { goto error; } PyObject *p = PyTuple_GET_ITEM(alias->args, i); @@ -300,12 +300,12 @@ union_repr(PyObject *self) } #if 0 - PyUnicodeWriter_WriteUTF8(writer, "|args=", 6); + PyUnicodeWriter_WriteASCII(writer, "|args=", 6); PyUnicodeWriter_WriteRepr(writer, alias->args); - PyUnicodeWriter_WriteUTF8(writer, "|h=", 3); + PyUnicodeWriter_WriteASCII(writer, "|h=", 3); PyUnicodeWriter_WriteRepr(writer, alias->hashable_args); if (alias->unhashable_args) { - PyUnicodeWriter_WriteUTF8(writer, "|u=", 3); + PyUnicodeWriter_WriteASCII(writer, "|u=", 3); PyUnicodeWriter_WriteRepr(writer, alias->unhashable_args); } #endif diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py index 22dcfe1b0d99bf..dba20226c3283a 100755 --- a/Parser/asdl_c.py +++ b/Parser/asdl_c.py @@ -1512,7 +1512,7 @@ def visitModule(self, mod): for (Py_ssize_t i = 0; i < Py_MIN(length, 2); i++) { if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { goto error; } } @@ -1536,7 +1536,7 @@ def visitModule(self, mod): } if (i == 0 && length > 2) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ...", 5) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ...", 5) < 0) { goto error; } } @@ -1640,7 +1640,7 @@ def visitModule(self, mod): } if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { Py_DECREF(name); Py_DECREF(value_repr); goto error; diff --git a/Python/Python-ast.c b/Python/Python-ast.c index f7625ab1205bdc..660bc598a4862c 100644 --- a/Python/Python-ast.c +++ b/Python/Python-ast.c @@ -5796,7 +5796,7 @@ ast_repr_list(PyObject *list, int depth) for (Py_ssize_t i = 0; i < Py_MIN(length, 2); i++) { if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { goto error; } } @@ -5820,7 +5820,7 @@ ast_repr_list(PyObject *list, int depth) } if (i == 0 && length > 2) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ...", 5) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ...", 5) < 0) { goto error; } } @@ -5924,7 +5924,7 @@ ast_repr_max_depth(AST_object *self, int depth) } if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { Py_DECREF(name); Py_DECREF(value_repr); goto error; diff --git a/Python/context.c b/Python/context.c index dceaae9b42979d..9927cab915cae7 100644 --- a/Python/context.c +++ b/Python/context.c @@ -979,7 +979,7 @@ contextvar_tp_repr(PyObject *op) return NULL; } - if (PyUnicodeWriter_WriteUTF8(writer, "tok_used) { - if (PyUnicodeWriter_WriteUTF8(writer, " used", 5) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, " used", 5) < 0) { goto error; } } - if (PyUnicodeWriter_WriteUTF8(writer, " var=", 5) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, " var=", 5) < 0) { goto error; } if (PyUnicodeWriter_WriteRepr(writer, (PyObject *)self->tok_var) < 0) { diff --git a/Python/hamt.c b/Python/hamt.c index f9bbf63961d8de..906149cc6cdbdc 100644 --- a/Python/hamt.c +++ b/Python/hamt.c @@ -1176,7 +1176,7 @@ hamt_node_bitmap_dump(PyHamtNode_Bitmap *node, } if (key_or_null == NULL) { - if (PyUnicodeWriter_WriteUTF8(writer, "NULL:\n", -1) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "NULL:\n", 6) < 0) { goto error; } @@ -1194,7 +1194,7 @@ hamt_node_bitmap_dump(PyHamtNode_Bitmap *node, } } - if (PyUnicodeWriter_WriteUTF8(writer, "\n", 1) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "\n", 1) < 0) { goto error; } } @@ -1915,7 +1915,7 @@ hamt_node_array_dump(PyHamtNode_Array *node, goto error; } - if (PyUnicodeWriter_WriteUTF8(writer, "\n", 1) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "\n", 1) < 0) { goto error; } } pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy