From 240e7e5e5104b91951b606be61da1a10613a4b01 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 22 Apr 2020 22:06:30 +0900 Subject: [PATCH 1/9] bpo-36346: array: Don't use deprecated APIs * Py_UNICODE -> wchar_t * Py_UNICODE -> unicode in Argument Clinic * PyUnicode_AsUnicode -> PyUnicode_AsWideCharString --- Modules/arraymodule.c | 65 +++++++++++++++------------------- Modules/clinic/arraymodule.c.h | 17 +++++---- 2 files changed, 39 insertions(+), 43 deletions(-) diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index 4920ad7b82124c..54f2630453d600 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -235,13 +235,13 @@ BB_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) static PyObject * u_getitem(arrayobject *ap, Py_ssize_t i) { - return PyUnicode_FromOrdinal(((Py_UNICODE *) ap->ob_item)[i]); + return PyUnicode_FromOrdinal(((wchar_t *) ap->ob_item)[i]); } static int u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) { - Py_UNICODE *p; + wchar_t *p; Py_ssize_t len; if (!PyArg_Parse(v, "u#;array item must be unicode character", &p, &len)) @@ -252,7 +252,7 @@ u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) return -1; } if (i >= 0) - ((Py_UNICODE *)ap->ob_item)[i] = p[0]; + ((wchar_t *)ap->ob_item)[i] = p[0]; return 0; } @@ -530,7 +530,7 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) DEFINE_COMPAREITEMS(b, signed char) DEFINE_COMPAREITEMS(BB, unsigned char) -DEFINE_COMPAREITEMS(u, Py_UNICODE) +DEFINE_COMPAREITEMS(u, wchar_t) DEFINE_COMPAREITEMS(h, short) DEFINE_COMPAREITEMS(HH, unsigned short) DEFINE_COMPAREITEMS(i, int) @@ -548,7 +548,7 @@ DEFINE_COMPAREITEMS(QQ, unsigned long long) static const struct arraydescr descriptors[] = { {'b', 1, b_getitem, b_setitem, b_compareitems, "b", 1, 1}, {'B', 1, BB_getitem, BB_setitem, BB_compareitems, "B", 1, 0}, - {'u', sizeof(Py_UNICODE), u_getitem, u_setitem, u_compareitems, "u", 0, 0}, + {'u', sizeof(wchar_t), u_getitem, u_setitem, u_compareitems, "u", 0, 0}, {'h', sizeof(short), h_getitem, h_setitem, h_compareitems, "h", 1, 1}, {'H', sizeof(short), HH_getitem, HH_setitem, HH_compareitems, "H", 1, 0}, {'i', sizeof(int), i_getitem, i_setitem, i_compareitems, "i", 1, 1}, @@ -1660,7 +1660,7 @@ array_array_tobytes_impl(arrayobject *self) /*[clinic input] array.array.fromunicode - ustr: Py_UNICODE(zeroes=True) + ustr: unicode / Extends this array with data from the unicode string ustr. @@ -1671,25 +1671,29 @@ some other type. [clinic start generated code]*/ static PyObject * -array_array_fromunicode_impl(arrayobject *self, const Py_UNICODE *ustr, - Py_ssize_clean_t ustr_length) -/*[clinic end generated code: output=cf2f662908e2befc input=150f00566ffbca6e]*/ +array_array_fromunicode_impl(arrayobject *self, PyObject *ustr) +/*[clinic end generated code: output=24359f5e001a7f2b input=025db1fdade7a4ce]*/ { - char typecode; - - typecode = self->ob_descr->typecode; - if (typecode != 'u') { + if (self->ob_descr->typecode != 'u') { PyErr_SetString(PyExc_ValueError, "fromunicode() may only be called on " "unicode type arrays"); return NULL; } - if (ustr_length > 0) { + + Py_ssize_t ustr_length = PyUnicode_AsWideChar(ustr, NULL, 0); + if (ustr_length > 1) { + ustr_length--; /* trim trailing NUL character */ Py_ssize_t old_size = Py_SIZE(self); - if (array_resize(self, old_size + ustr_length) == -1) + if (array_resize(self, old_size + ustr_length) == -1) { + return NULL; + } + + Py_ssize_t res = PyUnicode_AsWideChar( + ustr, ((wchar_t *)self->ob_item) + old_size, ustr_length); + if (res < 0) { // must not happen return NULL; - memcpy(self->ob_item + old_size * sizeof(Py_UNICODE), - ustr, ustr_length * sizeof(Py_UNICODE)); + } } Py_RETURN_NONE; @@ -1709,14 +1713,12 @@ static PyObject * array_array_tounicode_impl(arrayobject *self) /*[clinic end generated code: output=08e442378336e1ef input=127242eebe70b66d]*/ { - char typecode; - typecode = self->ob_descr->typecode; - if (typecode != 'u') { + if (self->ob_descr->typecode != 'u') { PyErr_SetString(PyExc_ValueError, "tounicode() may only be called on unicode type arrays"); return NULL; } - return PyUnicode_FromWideChar((Py_UNICODE *) self->ob_item, Py_SIZE(self)); + return PyUnicode_FromWideChar((wchar_t *) self->ob_item, Py_SIZE(self)); } /*[clinic input] @@ -2675,30 +2677,21 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) Py_DECREF(v); } else if (initial != NULL && PyUnicode_Check(initial)) { - Py_UNICODE *ustr; Py_ssize_t n; - - ustr = PyUnicode_AsUnicode(initial); + wchar_t *ustr = PyUnicode_AsWideCharString(initial, &n); if (ustr == NULL) { - PyErr_NoMemory(); Py_DECREF(a); return NULL; } - n = PyUnicode_GET_DATA_SIZE(initial); if (n > 0) { arrayobject *self = (arrayobject *)a; - char *item = self->ob_item; - item = (char *)PyMem_Realloc(item, n); - if (item == NULL) { - PyErr_NoMemory(); - Py_DECREF(a); - return NULL; + if (self->ob_item != NULL) { + PyMem_Free(self->ob_item); } - self->ob_item = item; - Py_SET_SIZE(self, n / sizeof(Py_UNICODE)); - memcpy(item, ustr, n); - self->allocated = Py_SIZE(self); + self->ob_item = (char *)ustr; + Py_SET_SIZE(self, n); + self->allocated = n; } } else if (initial != NULL && array_Check(initial) && len > 0) { diff --git a/Modules/clinic/arraymodule.c.h b/Modules/clinic/arraymodule.c.h index e1f4b0397b9cb5..b9245ca91d5fa9 100644 --- a/Modules/clinic/arraymodule.c.h +++ b/Modules/clinic/arraymodule.c.h @@ -380,20 +380,23 @@ PyDoc_STRVAR(array_array_fromunicode__doc__, {"fromunicode", (PyCFunction)array_array_fromunicode, METH_O, array_array_fromunicode__doc__}, static PyObject * -array_array_fromunicode_impl(arrayobject *self, const Py_UNICODE *ustr, - Py_ssize_clean_t ustr_length); +array_array_fromunicode_impl(arrayobject *self, PyObject *ustr); static PyObject * array_array_fromunicode(arrayobject *self, PyObject *arg) { PyObject *return_value = NULL; - const Py_UNICODE *ustr; - Py_ssize_clean_t ustr_length; + PyObject *ustr; - if (!PyArg_Parse(arg, "u#:fromunicode", &ustr, &ustr_length)) { + if (!PyUnicode_Check(arg)) { + _PyArg_BadArgument("fromunicode", "argument", "str", arg); goto exit; } - return_value = array_array_fromunicode_impl(self, ustr, ustr_length); + if (PyUnicode_READY(arg) == -1) { + goto exit; + } + ustr = arg; + return_value = array_array_fromunicode_impl(self, ustr); exit: return return_value; @@ -531,4 +534,4 @@ PyDoc_STRVAR(array_arrayiterator___setstate____doc__, #define ARRAY_ARRAYITERATOR___SETSTATE___METHODDEF \ {"__setstate__", (PyCFunction)array_arrayiterator___setstate__, METH_O, array_arrayiterator___setstate____doc__}, -/*[clinic end generated code: output=f649fc0bc9f6b13a input=a9049054013a1b77]*/ +/*[clinic end generated code: output=9f70748dd3bc532f input=a9049054013a1b77]*/ From 1d9569ae7acfffe726dcd94f6890c8165d750e9b Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 23 Apr 2020 10:01:09 +0900 Subject: [PATCH 2/9] update doc --- Doc/library/array.rst | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Doc/library/array.rst b/Doc/library/array.rst index c9a9b1dabb2a79..823d4aa755de64 100644 --- a/Doc/library/array.rst +++ b/Doc/library/array.rst @@ -22,7 +22,7 @@ defined: +-----------+--------------------+-------------------+-----------------------+-------+ | ``'B'`` | unsigned char | int | 1 | | +-----------+--------------------+-------------------+-----------------------+-------+ -| ``'u'`` | Py_UNICODE | Unicode character | 2 | \(1) | +| ``'u'`` | wchar_t | Unicode character | 2 | \(1) | +-----------+--------------------+-------------------+-----------------------+-------+ | ``'h'`` | signed short | int | 2 | | +-----------+--------------------+-------------------+-----------------------+-------+ @@ -48,13 +48,10 @@ defined: Notes: (1) - The ``'u'`` type code corresponds to Python's obsolete unicode character + The ``'u'`` type code had corresponded to Python's obsolete unicode character (:c:type:`Py_UNICODE` which is :c:type:`wchar_t`). Depending on the platform, it can be 16 bits or 32 bits. - ``'u'`` will be removed together with the rest of the :c:type:`Py_UNICODE` - API. - .. deprecated-removed:: 3.3 4.0 The actual representation of values is determined by the machine architecture From c075693889b6338a5f011d1b9c448100fb2ea364 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 27 Apr 2020 17:54:16 +0900 Subject: [PATCH 3/9] Add what's new entry --- Doc/whatsnew/3.9.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst index 20ebe92865a146..55f8a655e3d538 100644 --- a/Doc/whatsnew/3.9.rst +++ b/Doc/whatsnew/3.9.rst @@ -832,6 +832,12 @@ Changes in the Python API inherit from it should have this method defined. (Contributed by Kyle Stanley in :issue:`34037`.) +* ``array('u')`` now uses ``wchar_t`` as C type instead of ``Py_UNICODE``. + This change doesn't affect to its behavior because ``Py_UNICODE`` is alias + of ``wchar_t`` since Python 3.3. Although ``array('u')`` is deprecated, + it may be alive after ``Py_UNICODE`` is removed. + (Contributed by Inada Naoki in :issue:`34538`.) + CPython bytecode changes ------------------------ From 3cf002800dff411a4a4eb8fab33e5759a0b08b31 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 30 Apr 2020 13:16:01 +0900 Subject: [PATCH 4/9] Apply suggestions from code review Co-Authored-By: Victor Stinner --- Doc/whatsnew/3.9.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst index 55f8a655e3d538..72f54cf21251f7 100644 --- a/Doc/whatsnew/3.9.rst +++ b/Doc/whatsnew/3.9.rst @@ -834,8 +834,7 @@ Changes in the Python API * ``array('u')`` now uses ``wchar_t`` as C type instead of ``Py_UNICODE``. This change doesn't affect to its behavior because ``Py_UNICODE`` is alias - of ``wchar_t`` since Python 3.3. Although ``array('u')`` is deprecated, - it may be alive after ``Py_UNICODE`` is removed. + of ``wchar_t`` since Python 3.3. (Contributed by Inada Naoki in :issue:`34538`.) From fbc39251fff47ff3ebb9f38247f1937a79d3ace6 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 30 Apr 2020 14:38:13 +0900 Subject: [PATCH 5/9] Update doc --- Doc/library/array.rst | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Doc/library/array.rst b/Doc/library/array.rst index 823d4aa755de64..78020738bf4f75 100644 --- a/Doc/library/array.rst +++ b/Doc/library/array.rst @@ -48,12 +48,16 @@ defined: Notes: (1) - The ``'u'`` type code had corresponded to Python's obsolete unicode character - (:c:type:`Py_UNICODE` which is :c:type:`wchar_t`). Depending on the - platform, it can be 16 bits or 32 bits. + It can be 16 bits or 32 bits depending on the platform. + + .. versionchanged:: 3.9 + ``array('u')`` now uses ``wchar_t`` as C type instead of deprecated + ``Py_UNICODE``. This change doesn't affect to its behavior because + ``Py_UNICODE`` is alias of ``wchar_t`` since Python 3.3. .. deprecated-removed:: 3.3 4.0 + The actual representation of values is determined by the machine architecture (strictly speaking, by the C implementation). The actual size can be accessed through the :attr:`itemsize` attribute. From bea1779557ae3fb1f72be6762a91c3bca7156f2c Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 30 Apr 2020 15:16:43 +0900 Subject: [PATCH 6/9] Don't use deprecated "u#" format. --- Modules/arraymodule.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index 54f2630453d600..959837f60ac6df 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -241,18 +241,25 @@ u_getitem(arrayobject *ap, Py_ssize_t i) static int u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) { - wchar_t *p; - Py_ssize_t len; - - if (!PyArg_Parse(v, "u#;array item must be unicode character", &p, &len)) + PyObject *u; + if (!PyArg_Parse(v, "U;array item must be unicode character", &u)) { return -1; - if (len != 1) { + } + + Py_ssize_t len = PyUnicode_AsWideChar(u, NULL, 0); + if (len != 2) { PyErr_SetString(PyExc_TypeError, "array item must be unicode character"); return -1; } - if (i >= 0) - ((wchar_t *)ap->ob_item)[i] = p[0]; + + wchar_t w; + len = PyUnicode_AsWideChar(u, &w, 1); + assert(len == 1); + + if (i >= 0) { + ((wchar_t *)ap->ob_item)[i] = w; + } return 0; } From df0ea67ea1099c7b81bc577f80d077c4e6c4bd5d Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 1 May 2020 15:17:54 +0900 Subject: [PATCH 7/9] more assert --- Modules/arraymodule.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index 959837f60ac6df..44d642b55e5689 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -1689,6 +1689,7 @@ array_array_fromunicode_impl(arrayobject *self, PyObject *ustr) } Py_ssize_t ustr_length = PyUnicode_AsWideChar(ustr, NULL, 0); + assert(ustr_length > 0); if (ustr_length > 1) { ustr_length--; /* trim trailing NUL character */ Py_ssize_t old_size = Py_SIZE(self); @@ -1698,9 +1699,7 @@ array_array_fromunicode_impl(arrayobject *self, PyObject *ustr) Py_ssize_t res = PyUnicode_AsWideChar( ustr, ((wchar_t *)self->ob_item) + old_size, ustr_length); - if (res < 0) { // must not happen - return NULL; - } + assert(res == ustr_length); } Py_RETURN_NONE; From 8454e8cdec80ad85a79682c73fa7fad64084fc74 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 1 May 2020 15:19:37 +0900 Subject: [PATCH 8/9] remove redundant NULL check --- Modules/arraymodule.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index 44d642b55e5689..d77eb1a6a28ebf 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -2692,9 +2692,8 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (n > 0) { arrayobject *self = (arrayobject *)a; - if (self->ob_item != NULL) { - PyMem_Free(self->ob_item); - } + // self->ob_item may be NULL but it is safe. + PyMem_Free(self->ob_item); self->ob_item = (char *)ustr; Py_SET_SIZE(self, n); self->allocated = n; From 4ddac33e97846aa6eb2aafdbce544c99f253ef9f Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 2 May 2020 01:53:47 +0900 Subject: [PATCH 9/9] remove redundant assert --- Modules/arraymodule.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index d77eb1a6a28ebf..732703e481adcd 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -1697,9 +1697,9 @@ array_array_fromunicode_impl(arrayobject *self, PyObject *ustr) return NULL; } - Py_ssize_t res = PyUnicode_AsWideChar( - ustr, ((wchar_t *)self->ob_item) + old_size, ustr_length); - assert(res == ustr_length); + // must not fail + PyUnicode_AsWideChar( + ustr, ((wchar_t *)self->ob_item) + old_size, ustr_length); } Py_RETURN_NONE; pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy