Skip to content

Commit d5d9a71

Browse files
methanevstinner
andauthored
bpo-36346: array: Don't use deprecated APIs (GH-19653)
* Py_UNICODE -> wchar_t * Py_UNICODE -> unicode in Argument Clinic * PyUnicode_AsUnicode -> PyUnicode_AsWideCharString * Don't use "u#" format. Co-authored-by: Victor Stinner <vstinner@python.org>
1 parent 6067d4b commit d5d9a71

File tree

4 files changed

+63
-55
lines changed

4 files changed

+63
-55
lines changed

Doc/library/array.rst

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ defined:
2222
+-----------+--------------------+-------------------+-----------------------+-------+
2323
| ``'B'`` | unsigned char | int | 1 | |
2424
+-----------+--------------------+-------------------+-----------------------+-------+
25-
| ``'u'`` | Py_UNICODE | Unicode character | 2 | \(1) |
25+
| ``'u'`` | wchar_t | Unicode character | 2 | \(1) |
2626
+-----------+--------------------+-------------------+-----------------------+-------+
2727
| ``'h'`` | signed short | int | 2 | |
2828
+-----------+--------------------+-------------------+-----------------------+-------+
@@ -48,15 +48,16 @@ defined:
4848
Notes:
4949

5050
(1)
51-
The ``'u'`` type code corresponds to Python's obsolete unicode character
52-
(:c:type:`Py_UNICODE` which is :c:type:`wchar_t`). Depending on the
53-
platform, it can be 16 bits or 32 bits.
51+
It can be 16 bits or 32 bits depending on the platform.
5452

55-
``'u'`` will be removed together with the rest of the :c:type:`Py_UNICODE`
56-
API.
53+
.. versionchanged:: 3.9
54+
``array('u')`` now uses ``wchar_t`` as C type instead of deprecated
55+
``Py_UNICODE``. This change doesn't affect to its behavior because
56+
``Py_UNICODE`` is alias of ``wchar_t`` since Python 3.3.
5757

5858
.. deprecated-removed:: 3.3 4.0
5959

60+
6061
The actual representation of values is determined by the machine architecture
6162
(strictly speaking, by the C implementation). The actual size can be accessed
6263
through the :attr:`itemsize` attribute.

Doc/whatsnew/3.9.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -786,6 +786,12 @@ Changes in the Python API
786786
``PyCF_ALLOW_TOP_LEVEL_AWAIT`` was clashing with ``CO_FUTURE_DIVISION``.
787787
(Contributed by Batuhan Taskaya in :issue:`39562`)
788788

789+
* ``array('u')`` now uses ``wchar_t`` as C type instead of ``Py_UNICODE``.
790+
This change doesn't affect to its behavior because ``Py_UNICODE`` is alias
791+
of ``wchar_t`` since Python 3.3.
792+
(Contributed by Inada Naoki in :issue:`34538`.)
793+
794+
789795
CPython bytecode changes
790796
------------------------
791797

Modules/arraymodule.c

Lines changed: 40 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -235,24 +235,31 @@ BB_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
235235
static PyObject *
236236
u_getitem(arrayobject *ap, Py_ssize_t i)
237237
{
238-
return PyUnicode_FromOrdinal(((Py_UNICODE *) ap->ob_item)[i]);
238+
return PyUnicode_FromOrdinal(((wchar_t *) ap->ob_item)[i]);
239239
}
240240

241241
static int
242242
u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
243243
{
244-
Py_UNICODE *p;
245-
Py_ssize_t len;
246-
247-
if (!PyArg_Parse(v, "u#;array item must be unicode character", &p, &len))
244+
PyObject *u;
245+
if (!PyArg_Parse(v, "U;array item must be unicode character", &u)) {
248246
return -1;
249-
if (len != 1) {
247+
}
248+
249+
Py_ssize_t len = PyUnicode_AsWideChar(u, NULL, 0);
250+
if (len != 2) {
250251
PyErr_SetString(PyExc_TypeError,
251252
"array item must be unicode character");
252253
return -1;
253254
}
254-
if (i >= 0)
255-
((Py_UNICODE *)ap->ob_item)[i] = p[0];
255+
256+
wchar_t w;
257+
len = PyUnicode_AsWideChar(u, &w, 1);
258+
assert(len == 1);
259+
260+
if (i >= 0) {
261+
((wchar_t *)ap->ob_item)[i] = w;
262+
}
256263
return 0;
257264
}
258265

@@ -530,7 +537,7 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
530537

531538
DEFINE_COMPAREITEMS(b, signed char)
532539
DEFINE_COMPAREITEMS(BB, unsigned char)
533-
DEFINE_COMPAREITEMS(u, Py_UNICODE)
540+
DEFINE_COMPAREITEMS(u, wchar_t)
534541
DEFINE_COMPAREITEMS(h, short)
535542
DEFINE_COMPAREITEMS(HH, unsigned short)
536543
DEFINE_COMPAREITEMS(i, int)
@@ -548,7 +555,7 @@ DEFINE_COMPAREITEMS(QQ, unsigned long long)
548555
static const struct arraydescr descriptors[] = {
549556
{'b', 1, b_getitem, b_setitem, b_compareitems, "b", 1, 1},
550557
{'B', 1, BB_getitem, BB_setitem, BB_compareitems, "B", 1, 0},
551-
{'u', sizeof(Py_UNICODE), u_getitem, u_setitem, u_compareitems, "u", 0, 0},
558+
{'u', sizeof(wchar_t), u_getitem, u_setitem, u_compareitems, "u", 0, 0},
552559
{'h', sizeof(short), h_getitem, h_setitem, h_compareitems, "h", 1, 1},
553560
{'H', sizeof(short), HH_getitem, HH_setitem, HH_compareitems, "H", 1, 0},
554561
{'i', sizeof(int), i_getitem, i_setitem, i_compareitems, "i", 1, 1},
@@ -1660,7 +1667,7 @@ array_array_tobytes_impl(arrayobject *self)
16601667
/*[clinic input]
16611668
array.array.fromunicode
16621669
1663-
ustr: Py_UNICODE(zeroes=True)
1670+
ustr: unicode
16641671
/
16651672
16661673
Extends this array with data from the unicode string ustr.
@@ -1671,25 +1678,28 @@ some other type.
16711678
[clinic start generated code]*/
16721679

16731680
static PyObject *
1674-
array_array_fromunicode_impl(arrayobject *self, const Py_UNICODE *ustr,
1675-
Py_ssize_clean_t ustr_length)
1676-
/*[clinic end generated code: output=cf2f662908e2befc input=150f00566ffbca6e]*/
1681+
array_array_fromunicode_impl(arrayobject *self, PyObject *ustr)
1682+
/*[clinic end generated code: output=24359f5e001a7f2b input=025db1fdade7a4ce]*/
16771683
{
1678-
char typecode;
1679-
1680-
typecode = self->ob_descr->typecode;
1681-
if (typecode != 'u') {
1684+
if (self->ob_descr->typecode != 'u') {
16821685
PyErr_SetString(PyExc_ValueError,
16831686
"fromunicode() may only be called on "
16841687
"unicode type arrays");
16851688
return NULL;
16861689
}
1687-
if (ustr_length > 0) {
1690+
1691+
Py_ssize_t ustr_length = PyUnicode_AsWideChar(ustr, NULL, 0);
1692+
assert(ustr_length > 0);
1693+
if (ustr_length > 1) {
1694+
ustr_length--; /* trim trailing NUL character */
16881695
Py_ssize_t old_size = Py_SIZE(self);
1689-
if (array_resize(self, old_size + ustr_length) == -1)
1696+
if (array_resize(self, old_size + ustr_length) == -1) {
16901697
return NULL;
1691-
memcpy(self->ob_item + old_size * sizeof(Py_UNICODE),
1692-
ustr, ustr_length * sizeof(Py_UNICODE));
1698+
}
1699+
1700+
// must not fail
1701+
PyUnicode_AsWideChar(
1702+
ustr, ((wchar_t *)self->ob_item) + old_size, ustr_length);
16931703
}
16941704

16951705
Py_RETURN_NONE;
@@ -1709,14 +1719,12 @@ static PyObject *
17091719
array_array_tounicode_impl(arrayobject *self)
17101720
/*[clinic end generated code: output=08e442378336e1ef input=127242eebe70b66d]*/
17111721
{
1712-
char typecode;
1713-
typecode = self->ob_descr->typecode;
1714-
if (typecode != 'u') {
1722+
if (self->ob_descr->typecode != 'u') {
17151723
PyErr_SetString(PyExc_ValueError,
17161724
"tounicode() may only be called on unicode type arrays");
17171725
return NULL;
17181726
}
1719-
return PyUnicode_FromWideChar((Py_UNICODE *) self->ob_item, Py_SIZE(self));
1727+
return PyUnicode_FromWideChar((wchar_t *) self->ob_item, Py_SIZE(self));
17201728
}
17211729

17221730
/*[clinic input]
@@ -2675,30 +2683,20 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
26752683
Py_DECREF(v);
26762684
}
26772685
else if (initial != NULL && PyUnicode_Check(initial)) {
2678-
Py_UNICODE *ustr;
26792686
Py_ssize_t n;
2680-
2681-
ustr = PyUnicode_AsUnicode(initial);
2687+
wchar_t *ustr = PyUnicode_AsWideCharString(initial, &n);
26822688
if (ustr == NULL) {
2683-
PyErr_NoMemory();
26842689
Py_DECREF(a);
26852690
return NULL;
26862691
}
26872692

2688-
n = PyUnicode_GET_DATA_SIZE(initial);
26892693
if (n > 0) {
26902694
arrayobject *self = (arrayobject *)a;
2691-
char *item = self->ob_item;
2692-
item = (char *)PyMem_Realloc(item, n);
2693-
if (item == NULL) {
2694-
PyErr_NoMemory();
2695-
Py_DECREF(a);
2696-
return NULL;
2697-
}
2698-
self->ob_item = item;
2699-
Py_SET_SIZE(self, n / sizeof(Py_UNICODE));
2700-
memcpy(item, ustr, n);
2701-
self->allocated = Py_SIZE(self);
2695+
// self->ob_item may be NULL but it is safe.
2696+
PyMem_Free(self->ob_item);
2697+
self->ob_item = (char *)ustr;
2698+
Py_SET_SIZE(self, n);
2699+
self->allocated = n;
27022700
}
27032701
}
27042702
else if (initial != NULL && array_Check(initial) && len > 0) {

Modules/clinic/arraymodule.c.h

Lines changed: 10 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy