From 699e616db735dd85fe054d7251c97e2a56a6d168 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 26 Oct 2018 07:43:36 +0300 Subject: [PATCH 01/17] bpo-36346: Prepare for removing the legacy Unicode C API. Add two compile time options: HAVE_UNICODE_WCHAR_CACHE and USE_UNICODE_WCHAR_CACHE. USE_UNICODE_WCHAR_CACHE makes the interpreter not using the wchar_t cache and the legacy Unicode C API. HAVE_UNICODE_WCHAR_CACHE removes the wchar_t cache and the legacy Unicode C API that depends on it. --- Include/cpython/unicodeobject.h | 15 ++ Lib/test/clinic.test | 19 +- Lib/test/support/__init__.py | 8 + Lib/test/test_csv.py | 2 +- Lib/test/test_decimal.py | 7 +- Lib/test/test_getargs2.py | 4 + Lib/test/test_unicode.py | 2 + Modules/_codecsmodule.c | 22 +- Modules/_ctypes/_ctypes.c | 28 ++- Modules/_ctypes/callproc.c | 10 +- Modules/_ctypes/cfield.c | 26 ++- Modules/_io/fileio.c | 9 + Modules/_testcapimodule.c | 16 ++ Modules/_winapi.c | 92 ++++---- Modules/arraymodule.c | 79 ++++++- Modules/clinic/_winapi.c.h | 77 +++++-- Modules/clinic/arraymodule.c.h | 21 +- Modules/clinic/posixmodule.c.h | 57 ++++- Modules/overlapped.c | 89 ++++++-- Modules/posixmodule.c | 87 +++++-- Objects/unicodeobject.c | 394 ++++++++++++++++++++++---------- PC/_msi.c | 19 +- PC/clinic/winreg.c.h | 304 +++++++++++++++++++++--- PC/winreg.c | 75 ++++-- Python/dynload_win.c | 10 +- Python/fileutils.c | 20 +- Python/getargs.c | 7 + Python/traceback.c | 15 +- Tools/clinic/clinic.py | 59 ++++- 29 files changed, 1237 insertions(+), 336 deletions(-) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index c7634902248534..b01c55e85515a7 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -15,6 +15,9 @@ typedef wchar_t Py_UNICODE /* Py_DEPRECATED(3.3) */; /* --- Internal Unicode Operations ---------------------------------------- */ +#define HAVE_UNICODE_WCHAR_CACHE 1 +#define USE_UNICODE_WCHAR_CACHE 1 + /* Since splitting on whitespace is an important use case, and whitespace in most situations is solely ASCII whitespace, we optimize for the common case by using a quick look-up table @@ -71,6 +74,7 @@ typedef wchar_t Py_UNICODE /* Py_DEPRECATED(3.3) */; /* low surrogate = bottom 10 bits added to DC00 */ #define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF)) +#if HAVE_UNICODE_WCHAR_CACHE /* Check if substring matches at given offset. The offset must be valid, and the substring must not be empty. */ @@ -78,6 +82,7 @@ typedef wchar_t Py_UNICODE /* Py_DEPRECATED(3.3) */; ((*((string)->wstr + (offset)) == *((substring)->wstr)) && \ ((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \ !memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE))) +#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* --- Unicode Type ------------------------------------------------------- */ @@ -218,7 +223,9 @@ typedef struct { 4 bytes (see issue #19537 on m68k). */ unsigned int :24; } state; +#if HAVE_UNICODE_WCHAR_CACHE wchar_t *wstr; /* wchar_t representation (null-terminated) */ +#endif /* HAVE_UNICODE_WCHAR_CACHE */ } PyASCIIObject; /* Non-ASCII strings allocated through PyUnicode_New use the @@ -229,8 +236,10 @@ typedef struct { Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the * terminating \0. */ char *utf8; /* UTF-8 representation (null-terminated) */ +#if HAVE_UNICODE_WCHAR_CACHE Py_ssize_t wstr_length; /* Number of code points in wstr, possible * surrogates count as two code points. */ +#endif /* HAVE_UNICODE_WCHAR_CACHE */ } PyCompactUnicodeObject; /* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the @@ -247,6 +256,8 @@ typedef struct { } PyUnicodeObject; /* Fast access macros */ +#if HAVE_UNICODE_WCHAR_CACHE + #define PyUnicode_WSTR_LENGTH(op) \ (PyUnicode_IS_COMPACT_ASCII(op) ? \ ((PyASCIIObject*)op)->length : \ @@ -285,6 +296,7 @@ typedef struct { ((const char *)(PyUnicode_AS_UNICODE(op))) /* Py_DEPRECATED(3.3) */ +#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* --- Flexible String Representation Helper Macros (PEP 393) -------------- */ @@ -1240,6 +1252,9 @@ PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void); and where the hash values are equal (i.e. a very probable match) */ PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *); +PyAPI_FUNC(int) _PyUnicode_WideCharString_Converter(PyObject *, void *); +PyAPI_FUNC(int) _PyUnicode_WideCharString_Opt_Converter(PyObject *, void *); + #ifdef __cplusplus } #endif diff --git a/Lib/test/clinic.test b/Lib/test/clinic.test index 62c2f00c6b2b98..5cb89f87b090e6 100644 --- a/Lib/test/clinic.test +++ b/Lib/test/clinic.test @@ -1871,13 +1871,26 @@ test_Py_UNICODE_converter(PyObject *module, PyObject *const *args, Py_ssize_t na const Py_UNICODE *e; Py_ssize_clean_t e_length; - if (!_PyArg_ParseStack(args, nargs, "uuZu#Z#:test_Py_UNICODE_converter", - &a, &b, &c, &d, &d_length, &e, &e_length)) { + if (!_PyArg_ParseStack(args, nargs, "O&O&O&u#Z#:test_Py_UNICODE_converter", + _PyUnicode_WideCharString_Converter, &a, _PyUnicode_WideCharString_Converter, &b, _PyUnicode_WideCharString_Opt_Converter, &c, &d, &d_length, &e, &e_length)) { goto exit; } return_value = test_Py_UNICODE_converter_impl(module, a, b, c, d, d_length, e, e_length); exit: + /* Cleanup for a */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)a); + #endif /* USE_UNICODE_WCHAR_CACHE */ + /* Cleanup for b */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)b); + #endif /* USE_UNICODE_WCHAR_CACHE */ + /* Cleanup for c */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)c); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -1888,7 +1901,7 @@ test_Py_UNICODE_converter_impl(PyObject *module, const Py_UNICODE *a, Py_ssize_clean_t d_length, const Py_UNICODE *e, Py_ssize_clean_t e_length) -/*[clinic end generated code: output=dd0a09a1b772e57b input=064a3b68ad7f04b0]*/ +/*[clinic end generated code: output=ef45e982fedf0b3d input=064a3b68ad7f04b0]*/ /*[clinic input] diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 5bd15a2feae9d7..3b395ee7715de3 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -68,6 +68,11 @@ except ImportError: resource = None +try: + from _testcapi import unicode_legacy_string +except ImportError: + unicode_legacy_string = None + __all__ = [ # globals "PIPE_MAX_SIZE", "verbose", "max_memuse", "use_resources", "failfast", @@ -814,6 +819,9 @@ def dec(*args, **kwargs): requires_lzma = unittest.skipUnless(lzma, 'requires lzma') +requires_legacy_unicode_capi = unittest.skipUnless(unicode_legacy_string, + 'requires legacy Unicode C API') + is_jython = sys.platform.startswith('java') is_android = hasattr(sys, 'getandroidapilevel') diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 7a333139b5ea2c..ca39a17aa56732 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -231,9 +231,9 @@ def test_writerows_with_none(self): self.assertEqual(fileobj.read(), 'a\r\n""\r\n') @support.cpython_only + @support.requires_legacy_unicode_capi def test_writerows_legacy_strings(self): import _testcapi - c = _testcapi.unicode_legacy_string('a') with TemporaryFile("w+", newline='') as fileobj: writer = csv.writer(fileobj) diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py index 1f37b5372a3e7e..df6c894aa97335 100644 --- a/Lib/test/test_decimal.py +++ b/Lib/test/test_decimal.py @@ -33,9 +33,10 @@ import numbers import locale from test.support import (run_unittest, run_doctest, is_resource_enabled, - requires_IEEE_754, requires_docstrings) + requires_IEEE_754, requires_docstrings, + requires_legacy_unicode_capi) from test.support import (import_fresh_module, TestFailed, - run_with_locale, cpython_only) + run_with_locale, cpython_only, get_attribute) import random import inspect import threading @@ -581,6 +582,7 @@ def test_explicit_from_string(self): self.assertRaises(InvalidOperation, Decimal, "1_2_\u00003") @cpython_only + @requires_legacy_unicode_capi def test_from_legacy_strings(self): import _testcapi Decimal = self.decimal.Decimal @@ -2816,6 +2818,7 @@ def test_none_args(self): Overflow]) @cpython_only + @requires_legacy_unicode_capi def test_from_legacy_strings(self): import _testcapi c = self.decimal.Context() diff --git a/Lib/test/test_getargs2.py b/Lib/test/test_getargs2.py index 07e2d151379182..fb451d964ebd0e 100644 --- a/Lib/test/test_getargs2.py +++ b/Lib/test/test_getargs2.py @@ -985,6 +985,7 @@ def test_et_hash(self): buf = bytearray() self.assertRaises(ValueError, getargs_et_hash, 'abc\xe9', 'latin1', buf) + @support.requires_legacy_unicode_capi def test_u(self): from _testcapi import getargs_u self.assertEqual(getargs_u('abc\xe9'), 'abc\xe9') @@ -994,6 +995,7 @@ def test_u(self): self.assertRaises(TypeError, getargs_u, memoryview(b'memoryview')) self.assertRaises(TypeError, getargs_u, None) + @support.requires_legacy_unicode_capi def test_u_hash(self): from _testcapi import getargs_u_hash self.assertEqual(getargs_u_hash('abc\xe9'), 'abc\xe9') @@ -1003,6 +1005,7 @@ def test_u_hash(self): self.assertRaises(TypeError, getargs_u_hash, memoryview(b'memoryview')) self.assertRaises(TypeError, getargs_u_hash, None) + @support.requires_legacy_unicode_capi def test_Z(self): from _testcapi import getargs_Z self.assertEqual(getargs_Z('abc\xe9'), 'abc\xe9') @@ -1012,6 +1015,7 @@ def test_Z(self): self.assertRaises(TypeError, getargs_Z, memoryview(b'memoryview')) self.assertIsNone(getargs_Z(None)) + @support.requires_legacy_unicode_capi def test_Z_hash(self): from _testcapi import getargs_Z_hash self.assertEqual(getargs_Z_hash('abc\xe9'), 'abc\xe9') diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index c277e705b9f55e..7afe84c421793f 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2832,6 +2832,7 @@ def test_copycharacters(self): self.assertRaises(SystemError, unicode_copycharacters, s, 0, b'', 0, 0) @support.cpython_only + @support.requires_legacy_unicode_capi def test_encode_decimal(self): from _testcapi import unicode_encodedecimal self.assertEqual(unicode_encodedecimal('123'), @@ -2848,6 +2849,7 @@ def test_encode_decimal(self): unicode_encodedecimal, "123\u20ac", "replace") @support.cpython_only + @support.requires_legacy_unicode_capi def test_transform_decimal(self): from _testcapi import unicode_transformdecimaltoascii as transform_decimal self.assertEqual(transform_decimal('123'), diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index e0d6902b18aa14..64fdf6c588757f 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -704,13 +704,13 @@ _codecs_unicode_internal_encode_impl(PyObject *module, PyObject *obj, return NULL; if (PyUnicode_Check(obj)) { - Py_UNICODE *u; Py_ssize_t len, size; if (PyUnicode_READY(obj) < 0) return NULL; - u = PyUnicode_AsUnicodeAndSize(obj, &len); +#if USE_UNICODE_WCHAR_CACHE + Py_UNICODE *u = PyUnicode_AsUnicodeAndSize(obj, &len); if (u == NULL) return NULL; if ((size_t)len > (size_t)PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) @@ -718,6 +718,24 @@ _codecs_unicode_internal_encode_impl(PyObject *module, PyObject *obj, size = len * sizeof(Py_UNICODE); return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size), PyUnicode_GET_LENGTH(obj)); +#else /* USE_UNICODE_WCHAR_CACHE */ + len = PyUnicode_AsWideChar(obj, NULL, 0); + if (len < 0) { + return NULL; + } + assert(len > 0); + len--; + if ((size_t)len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) { + return PyErr_NoMemory(); + } + size = len * sizeof(wchar_t); + PyObject *bytes = PyBytes_FromStringAndSize(NULL, size); + if (bytes == NULL) { + return NULL; + } + PyUnicode_AsWideChar(obj, (wchar_t *)PyBytes_AS_STRING(bytes), len); + return codec_tuple(bytes, PyUnicode_GET_LENGTH(obj)); +#endif /* USE_UNICODE_WCHAR_CACHE */ } else { Py_buffer view; diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index 0d95d2b6f76ece..460eda2e7b44fe 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -1293,7 +1293,6 @@ static int WCharArray_set_value(CDataObject *self, PyObject *value, void *Py_UNUSED(ignored)) { Py_ssize_t result = 0; - Py_UNICODE *wstr; Py_ssize_t len; if (value == NULL) { @@ -1309,21 +1308,32 @@ WCharArray_set_value(CDataObject *self, PyObject *value, void *Py_UNUSED(ignored } else Py_INCREF(value); - wstr = PyUnicode_AsUnicodeAndSize(value, &len); - if (wstr == NULL) +#if USE_UNICODE_WCHAR_CACHE + len = PyUnicode_GetSize(value); + if (len < 0) { + Py_DECREF(value); + return -1; + } +#else /* USE_UNICODE_WCHAR_CACHE */ + len = PyUnicode_AsWideChar(value, NULL, 0); + if (len < 0) { + Py_DECREF(value); return -1; + } + assert(len > 0); + len--; +#endif /* USE_UNICODE_WCHAR_CACHE */ if ((size_t)len > self->b_size/sizeof(wchar_t)) { PyErr_SetString(PyExc_ValueError, "string too long"); - result = -1; - goto done; + Py_DECREF(value); + return -1; } result = PyUnicode_AsWideChar(value, (wchar_t *)self->b_ptr, self->b_size/sizeof(wchar_t)); if (result >= 0 && (size_t)result < self->b_size/sizeof(wchar_t)) ((wchar_t *)self->b_ptr)[result] = (wchar_t)0; - done: Py_DECREF(value); return result >= 0 ? 0 : -1; @@ -3358,10 +3368,12 @@ _validate_paramflags(PyTypeObject *type, PyObject *paramflags) for (i = 0; i < len; ++i) { PyObject *item = PyTuple_GET_ITEM(paramflags, i); int flag; - char *name; + PyObject *name = Py_None; PyObject *defval; PyObject *typ; - if (!PyArg_ParseTuple(item, "i|ZO", &flag, &name, &defval)) { + if (!PyArg_ParseTuple(item, "i|OO", &flag, &name, &defval) || + !(name == Py_None || PyUnicode_Check(name))) + { PyErr_SetString(PyExc_TypeError, "paramflags must be a sequence of (int [,string [,value]]) tuples"); return 0; diff --git a/Modules/_ctypes/callproc.c b/Modules/_ctypes/callproc.c index d91e84613b2fe1..410d4f2026ef06 100644 --- a/Modules/_ctypes/callproc.c +++ b/Modules/_ctypes/callproc.c @@ -1274,7 +1274,6 @@ The handle may be used to locate exported functions in this\n\ module.\n"; static PyObject *load_library(PyObject *self, PyObject *args) { - const WCHAR *name; PyObject *nameobj; PyObject *ignored; HMODULE hMod; @@ -1282,7 +1281,11 @@ static PyObject *load_library(PyObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "U|O:LoadLibrary", &nameobj, &ignored)) return NULL; - name = _PyUnicode_AsUnicode(nameobj); +#if USE_UNICODE_WCHAR_CACHE + const WCHAR *name = _PyUnicode_AsUnicode(nameobj); +#else /* USE_UNICODE_WCHAR_CACHE */ + WCHAR *name = PyUnicode_AsWideCharString(nameobj, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (!name) return NULL; @@ -1290,6 +1293,9 @@ static PyObject *load_library(PyObject *self, PyObject *args) hMod = LoadLibraryW(name); Py_END_ALLOW_THREADS +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(name); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (!hMod) return PyErr_SetFromWindowsErr(GetLastError()); #ifdef _WIN64 diff --git a/Modules/_ctypes/cfield.c b/Modules/_ctypes/cfield.c index 5f194e21550f7b..1e480c8771a716 100644 --- a/Modules/_ctypes/cfield.c +++ b/Modules/_ctypes/cfield.c @@ -1229,7 +1229,6 @@ U_get(void *ptr, Py_ssize_t size) static PyObject * U_set(void *ptr, PyObject *value, Py_ssize_t length) { - Py_UNICODE *wstr; Py_ssize_t size; /* It's easier to calculate in characters than in bytes */ @@ -1242,19 +1241,23 @@ U_set(void *ptr, PyObject *value, Py_ssize_t length) return NULL; } - wstr = PyUnicode_AsUnicodeAndSize(value, &size); - if (wstr == NULL) +#if USE_UNICODE_WCHAR_CACHE + size = PyUnicode_GetSize(value); + if (size < 0) return NULL; +#else /* USE_UNICODE_WCHAR_CACHE */ + size = PyUnicode_AsWideChar(value, NULL, 0); + if (size < 0) + return NULL; + size--; +#endif /* USE_UNICODE_WCHAR_CACHE */ if (size > length) { PyErr_Format(PyExc_ValueError, "string too long (%zd, maximum length %zd)", size, length); return NULL; - } else if (size < length-1) - /* copy terminating NUL character if there is space */ - size += 1; - - if (PyUnicode_AsWideChar(value, (wchar_t *)ptr, size) == -1) { + } + if (PyUnicode_AsWideChar(value, (wchar_t *)ptr, length) == -1) { return NULL; } @@ -1423,7 +1426,11 @@ BSTR_set(void *ptr, PyObject *value, Py_ssize_t size) if (value) { wchar_t* wvalue; Py_ssize_t wsize; +#if USE_UNICODE_WCHAR_CACHE wvalue = PyUnicode_AsUnicodeAndSize(value, &wsize); +#else /* USE_UNICODE_WCHAR_CACHE */ + wvalue = PyUnicode_AsWideCharString(value, &wsize); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (wvalue == NULL) return NULL; if ((unsigned) wsize != wsize) { @@ -1431,6 +1438,9 @@ BSTR_set(void *ptr, PyObject *value, Py_ssize_t size) return NULL; } bstr = SysAllocStringLen(wvalue, (unsigned)wsize); +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(wvalue); +#endif /* USE_UNICODE_WCHAR_CACHE */ } else bstr = NULL; diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index c502c430134ef6..487edf436aed36 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -275,7 +275,11 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode, if (!PyUnicode_FSDecoder(nameobj, &stringobj)) { return -1; } +#if USE_UNICODE_WCHAR_CACHE widename = PyUnicode_AsUnicode(stringobj); +#else /* USE_UNICODE_WCHAR_CACHE */ + widename = PyUnicode_AsWideCharString(stringobj, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (widename == NULL) return -1; #else @@ -492,6 +496,11 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode, internal_close(self); done: +#ifdef MS_WINDOWS +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(widename); +#endif /* USE_UNICODE_WCHAR_CACHE */ +#endif Py_CLEAR(stringobj); return ret; } diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 350ef771630eab..ddbc3dc80e1a19 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1410,6 +1410,7 @@ getargs_y_hash(PyObject *self, PyObject *args) return PyBytes_FromStringAndSize(str, size); } +//#if USE_UNICODE_WCHAR_CACHE static PyObject * getargs_u(PyObject *self, PyObject *args) { @@ -1453,6 +1454,7 @@ getargs_Z_hash(PyObject *self, PyObject *args) else Py_RETURN_NONE; } +// #endif /* USE_UNICODE_WCHAR_CACHE */ static PyObject * getargs_es(PyObject *self, PyObject *args) @@ -1637,6 +1639,7 @@ parse_tuple_and_keywords(PyObject *self, PyObject *args) static volatile int x; +#if USE_UNICODE_WCHAR_CACHE /* Test the u and u# codes for PyArg_ParseTuple. May leak memory in case of an error. */ @@ -1737,6 +1740,7 @@ test_Z_code(PyObject *self, PyObject *Py_UNUSED(ignored)) Py_DECREF(tuple); Py_RETURN_NONE; } +#endif /* USE_UNICODE_WCHAR_CACHE */ static PyObject * test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored)) @@ -1796,6 +1800,7 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored)) return raiseTestError("test_widechar", "PyUnicode_FromUnicode(L\"\\U00110000\", 1) didn't fail"); +#if USE_UNICODE_WCHAR_CACHE wide = PyUnicode_FromUnicode(NULL, 1); if (wide == NULL) return NULL; @@ -1809,6 +1814,7 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored)) return raiseTestError("test_widechar", "PyUnicode_Ready() didn't fail"); } +#endif /* USE_UNICODE_WCHAR_CACHE */ #endif Py_RETURN_NONE; @@ -1946,6 +1952,7 @@ unicode_copycharacters(PyObject *self, PyObject *args) return Py_BuildValue("(Nn)", to_copy, copied); } +#if USE_UNICODE_WCHAR_CACHE static PyObject * unicode_encodedecimal(PyObject *self, PyObject *args) { @@ -2013,6 +2020,7 @@ unicode_legacy_string(PyObject *self, PyObject *args) return u; } +#endif /* USE_UNICODE_WCHAR_CACHE */ static PyObject * getargs_w_star(PyObject *self, PyObject *args) @@ -4809,10 +4817,12 @@ static PyMethodDef TestMethods[] = { {"getargs_y", getargs_y, METH_VARARGS}, {"getargs_y_star", getargs_y_star, METH_VARARGS}, {"getargs_y_hash", getargs_y_hash, METH_VARARGS}, +// #if USE_UNICODE_WCHAR_CACHE {"getargs_u", getargs_u, METH_VARARGS}, {"getargs_u_hash", getargs_u_hash, METH_VARARGS}, {"getargs_Z", getargs_Z, METH_VARARGS}, {"getargs_Z_hash", getargs_Z_hash, METH_VARARGS}, +// #endif /* USE_UNICODE_WCHAR_CACHE */ {"getargs_w_star", getargs_w_star, METH_VARARGS}, {"getargs_es", getargs_es, METH_VARARGS}, {"getargs_et", getargs_et, METH_VARARGS}, @@ -4823,17 +4833,23 @@ static PyMethodDef TestMethods[] = { {"codec_incrementaldecoder", (PyCFunction)codec_incrementaldecoder, METH_VARARGS}, {"test_s_code", test_s_code, METH_NOARGS}, +#if USE_UNICODE_WCHAR_CACHE {"test_u_code", test_u_code, METH_NOARGS}, {"test_Z_code", test_Z_code, METH_NOARGS}, +#endif /* USE_UNICODE_WCHAR_CACHE */ {"test_widechar", test_widechar, METH_NOARGS}, {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS}, {"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS}, {"unicode_asucs4", unicode_asucs4, METH_VARARGS}, {"unicode_findchar", unicode_findchar, METH_VARARGS}, {"unicode_copycharacters", unicode_copycharacters, METH_VARARGS}, +#if USE_UNICODE_WCHAR_CACHE {"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS}, {"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS}, +#endif /* USE_UNICODE_WCHAR_CACHE */ +#if USE_UNICODE_WCHAR_CACHE {"unicode_legacy_string", unicode_legacy_string, METH_VARARGS}, +#endif /* USE_UNICODE_WCHAR_CACHE */ {"_test_thread_state", test_thread_state, METH_VARARGS}, {"_pending_threadfunc", pending_threadfunc, METH_VARARGS}, #ifdef HAVE_GETTIMEOFDAY diff --git a/Modules/_winapi.c b/Modules/_winapi.c index e7b221d888ef8d..28000c8dc80e06 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -164,10 +164,11 @@ create_converter('LPCVOID', '" F_POINTER "') create_converter('BOOL', 'i') # F_BOOL used previously (always 'i') create_converter('DWORD', 'k') # F_DWORD is always "k" (which is much shorter) create_converter('LPCTSTR', 's') -create_converter('LPCWSTR', 'u') -create_converter('LPWSTR', 'u') create_converter('UINT', 'I') # F_UINT used previously (always 'I') +class LPCWSTR_converter(Py_UNICODE_converter): + type = 'LPCWSTR' + class HANDLE_return_converter(CReturnConverter): type = 'HANDLE' @@ -197,7 +198,7 @@ class LPVOID_return_converter(CReturnConverter): data.return_conversion.append( 'return_value = HANDLE_TO_PYNUM(_return_value);\n') [python start generated code]*/ -/*[python end generated code: output=da39a3ee5e6b4b0d input=79464c61a31ae932]*/ +/*[python end generated code: output=da39a3ee5e6b4b0d input=011ee0c3a2244bfe]*/ #include "clinic/_winapi.c.h" @@ -512,15 +513,15 @@ _winapi_CreateFileMapping_impl(PyObject *module, HANDLE file_handle, /*[clinic input] _winapi.CreateJunction - src_path: LPWSTR - dst_path: LPWSTR + src_path: LPCWSTR + dst_path: LPCWSTR / [clinic start generated code]*/ static PyObject * -_winapi_CreateJunction_impl(PyObject *module, LPWSTR src_path, - LPWSTR dst_path) -/*[clinic end generated code: output=66b7eb746e1dfa25 input=8cd1f9964b6e3d36]*/ +_winapi_CreateJunction_impl(PyObject *module, LPCWSTR src_path, + LPCWSTR dst_path) +/*[clinic end generated code: output=44b3f5e9bbcc4271 input=963d29b44b9384a7]*/ { /* Privilege adjustment */ HANDLE token = NULL; @@ -752,12 +753,12 @@ gethandle(PyObject* obj, const char* name) return ret; } -static PyObject* +static wchar_t * getenvironment(PyObject* environment) { Py_ssize_t i, envsize, totalsize; - Py_UCS4 *buffer = NULL, *p, *end; - PyObject *keys, *values, *res; + wchar_t *buffer = NULL, *p, *end; + PyObject *keys, *values; /* convert environment dictionary to windows environment string */ if (! PyMapping_Check(environment)) { @@ -775,8 +776,8 @@ getenvironment(PyObject* environment) goto error; } - envsize = PySequence_Fast_GET_SIZE(keys); - if (PySequence_Fast_GET_SIZE(values) != envsize) { + envsize = PyList_GET_SIZE(keys); + if (PyList_GET_SIZE(values) != envsize) { PyErr_SetString(PyExc_RuntimeError, "environment changed size during iteration"); goto error; @@ -784,8 +785,9 @@ getenvironment(PyObject* environment) totalsize = 1; /* trailing null character */ for (i = 0; i < envsize; i++) { - PyObject* key = PySequence_Fast_GET_ITEM(keys, i); - PyObject* value = PySequence_Fast_GET_ITEM(values, i); + PyObject* key = PyList_GET_ITEM(keys, i); + PyObject* value = PyList_GET_ITEM(values, i); + Py_ssize_t size; if (! PyUnicode_Check(key) || ! PyUnicode_Check(value)) { PyErr_SetString(PyExc_TypeError, @@ -806,19 +808,25 @@ getenvironment(PyObject* environment) PyErr_SetString(PyExc_ValueError, "illegal environment variable name"); goto error; } - if (totalsize > PY_SSIZE_T_MAX - PyUnicode_GET_LENGTH(key) - 1) { + + size = PyUnicode_AsWideChar(key, NULL, 0); + assert(size > 1); + if (totalsize > PY_SSIZE_T_MAX - size) { PyErr_SetString(PyExc_OverflowError, "environment too long"); goto error; } - totalsize += PyUnicode_GET_LENGTH(key) + 1; /* +1 for '=' */ - if (totalsize > PY_SSIZE_T_MAX - PyUnicode_GET_LENGTH(value) - 1) { + totalsize += size; /* including '=' */ + + size = PyUnicode_AsWideChar(value, NULL, 0); + assert(size > 0); + if (totalsize > PY_SSIZE_T_MAX - size) { PyErr_SetString(PyExc_OverflowError, "environment too long"); goto error; } - totalsize += PyUnicode_GET_LENGTH(value) + 1; /* +1 for '\0' */ + totalsize += size; /* including trailing '\0' */ } - buffer = PyMem_NEW(Py_UCS4, totalsize); + buffer = PyMem_NEW(wchar_t, totalsize); if (! buffer) { PyErr_NoMemory(); goto error; @@ -827,31 +835,27 @@ getenvironment(PyObject* environment) end = buffer + totalsize; for (i = 0; i < envsize; i++) { - PyObject* key = PySequence_Fast_GET_ITEM(keys, i); - PyObject* value = PySequence_Fast_GET_ITEM(values, i); - if (!PyUnicode_AsUCS4(key, p, end - p, 0)) - goto error; - p += PyUnicode_GET_LENGTH(key); - *p++ = '='; - if (!PyUnicode_AsUCS4(value, p, end - p, 0)) - goto error; - p += PyUnicode_GET_LENGTH(value); - *p++ = '\0'; + PyObject* key = PyList_GET_ITEM(keys, i); + PyObject* value = PyList_GET_ITEM(values, i); + Py_ssize_t size = PyUnicode_AsWideChar(key, p, end - p); + assert(1 <= size && size < end - p); + p += size; + *p++ = L'='; + size = PyUnicode_AsWideChar(value, p, end - p); + assert(0 <= size && size < end - p); + p += size + 1; } - /* add trailing null byte */ - *p++ = '\0'; + /* add trailing null character */ + *p++ = L'\0'; assert(p == end); Py_XDECREF(keys); Py_XDECREF(values); - res = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, p - buffer); - PyMem_Free(buffer); - return res; + return buffer; error: - PyMem_Free(buffer); Py_XDECREF(keys); Py_XDECREF(values); return NULL; @@ -1053,8 +1057,7 @@ _winapi_CreateProcess_impl(PyObject *module, BOOL result; PROCESS_INFORMATION pi; STARTUPINFOEXW si; - PyObject *environment = NULL; - wchar_t *wenvironment; + wchar_t *wenvironment = NULL; wchar_t *command_line_copy = NULL; AttributeList attribute_list = {0}; @@ -1071,20 +1074,11 @@ _winapi_CreateProcess_impl(PyObject *module, goto cleanup; if (env_mapping != Py_None) { - environment = getenvironment(env_mapping); - if (environment == NULL) { - goto cleanup; - } - /* contains embedded null characters */ - wenvironment = PyUnicode_AsUnicode(environment); + wenvironment = getenvironment(env_mapping); if (wenvironment == NULL) { goto cleanup; } } - else { - environment = NULL; - wenvironment = NULL; - } if (getattributelist(startup_info, "lpAttributeList", &attribute_list) < 0) goto cleanup; @@ -1131,7 +1125,7 @@ _winapi_CreateProcess_impl(PyObject *module, cleanup: PyMem_Free(command_line_copy); - Py_XDECREF(environment); + PyMem_Free(wenvironment); freeattributelist(&attribute_list); return ret; diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index a5ba27cb36e218..d139dfbaabdaaf 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -243,6 +243,7 @@ u_getitem(arrayobject *ap, Py_ssize_t i) static int u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) { +#if USE_UNICODE_WCHAR_CACHE Py_UNICODE *p; Py_ssize_t len; @@ -256,6 +257,24 @@ u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) if (i >= 0) ((Py_UNICODE *)ap->ob_item)[i] = p[0]; return 0; +#else + PyObject *u; + Py_ssize_t len; + + if (!PyArg_Parse(v, "U;array item must be unicode character", &u)) + return -1; + len = PyUnicode_AsWideChar(u, NULL, 0); + if (len != 2) { + if (len >= 0) { + PyErr_SetString(PyExc_TypeError, + "array item must be unicode character"); + } + return -1; + } + if (i >= 0) + PyUnicode_AsWideChar(u, &((wchar_t *)ap->ob_item)[i], 1); + return 0; +#endif /* USE_UNICODE_WCHAR_CACHE */ } @@ -1701,7 +1720,7 @@ array_array_tostring_impl(arrayobject *self) /*[clinic input] array.array.fromunicode - ustr: Py_UNICODE(zeroes=True) + arg: unicode / Extends this array with data from the unicode string ustr. @@ -1712,25 +1731,38 @@ some other type. [clinic start generated code]*/ static PyObject * -array_array_fromunicode_impl(arrayobject *self, const Py_UNICODE *ustr, - Py_ssize_clean_t ustr_length) -/*[clinic end generated code: output=cf2f662908e2befc input=150f00566ffbca6e]*/ +array_array_fromunicode_impl(arrayobject *self, PyObject *arg) +/*[clinic end generated code: output=b691324745f39114 input=bc9c8ea0d901b328]*/ { - char typecode; + Py_ssize_t len; - typecode = self->ob_descr->typecode; + char typecode = self->ob_descr->typecode; if (typecode != 'u') { PyErr_SetString(PyExc_ValueError, "fromunicode() may only be called on " "unicode type arrays"); return NULL; } - if (ustr_length > 0) { + +#if USE_UNICODE_WCHAR_CACHE + len = PyUnicode_GetSize(arg); + if (len < 0) { + return NULL; + } +#else /* USE_UNICODE_WCHAR_CACHE */ + len = PyUnicode_AsWideChar(arg, NULL, 0); + if (len < 0) { + return NULL; + } + assert(len > 0); + len--; +#endif /* USE_UNICODE_WCHAR_CACHE */ + + if (len > 0) { Py_ssize_t old_size = Py_SIZE(self); - if (array_resize(self, old_size + ustr_length) == -1) + if (array_resize(self, old_size + len) == -1) return NULL; - memcpy(self->ob_item + old_size * sizeof(Py_UNICODE), - ustr, ustr_length * sizeof(Py_UNICODE)); + PyUnicode_AsWideChar(arg, (wchar_t *)self->ob_item + old_size, len); } Py_RETURN_NONE; @@ -2711,8 +2743,9 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) Py_DECREF(v); } else if (initial != NULL && PyUnicode_Check(initial)) { - Py_UNICODE *ustr; Py_ssize_t n; +#if USE_UNICODE_WCHAR_CACHE + Py_UNICODE *ustr; ustr = PyUnicode_AsUnicode(initial); if (ustr == NULL) { @@ -2736,6 +2769,30 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) memcpy(item, ustr, n); self->allocated = Py_SIZE(self); } +#else /* USE_UNICODE_WCHAR_CACHE */ + n = PyUnicode_AsWideChar(initial, NULL, 0); + if (n < 0) { + PyErr_NoMemory(); + Py_DECREF(a); + return NULL; + } + assert(n > 0); + n--; + if (n > 0) { + arrayobject *self = (arrayobject *)a; + char *item = self->ob_item; + item = (char *)PyMem_Realloc(item, n * sizeof(wchar_t)); + if (item == NULL) { + PyErr_NoMemory(); + Py_DECREF(a); + return NULL; + } + self->ob_item = item; + Py_SIZE(self) = n; + PyUnicode_AsWideChar(initial, (wchar_t*)item, n); + self->allocated = Py_SIZE(self); + } +#endif /* USE_UNICODE_WCHAR_CACHE */ } else if (initial != NULL && array_Check(initial) && len > 0) { arrayobject *self = (arrayobject *)a; diff --git a/Modules/clinic/_winapi.c.h b/Modules/clinic/_winapi.c.h index e21f2bc2b6fd6f..96af6530042996 100644 --- a/Modules/clinic/_winapi.c.h +++ b/Modules/clinic/_winapi.c.h @@ -195,8 +195,8 @@ _winapi_CreateFileMapping(PyObject *module, PyObject *const *args, Py_ssize_t na LPCWSTR name; HANDLE _return_value; - if (!_PyArg_ParseStack(args, nargs, "" F_HANDLE "" F_POINTER "kkku:CreateFileMapping", - &file_handle, &security_attributes, &protect, &max_size_high, &max_size_low, &name)) { + if (!_PyArg_ParseStack(args, nargs, "" F_HANDLE "" F_POINTER "kkkO&:CreateFileMapping", + &file_handle, &security_attributes, &protect, &max_size_high, &max_size_low, _PyUnicode_WideCharString_Converter, &name)) { goto exit; } _return_value = _winapi_CreateFileMapping_impl(module, file_handle, security_attributes, protect, max_size_high, max_size_low, name); @@ -209,6 +209,11 @@ _winapi_CreateFileMapping(PyObject *module, PyObject *const *args, Py_ssize_t na return_value = HANDLE_TO_PYNUM(_return_value); exit: + /* Cleanup for name */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)name); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -221,23 +226,55 @@ PyDoc_STRVAR(_winapi_CreateJunction__doc__, {"CreateJunction", (PyCFunction)(void(*)(void))_winapi_CreateJunction, METH_FASTCALL, _winapi_CreateJunction__doc__}, static PyObject * -_winapi_CreateJunction_impl(PyObject *module, LPWSTR src_path, - LPWSTR dst_path); +_winapi_CreateJunction_impl(PyObject *module, LPCWSTR src_path, + LPCWSTR dst_path); static PyObject * _winapi_CreateJunction(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; - LPWSTR src_path; - LPWSTR dst_path; + LPCWSTR src_path; + LPCWSTR dst_path; - if (!_PyArg_ParseStack(args, nargs, "uu:CreateJunction", - &src_path, &dst_path)) { + if (!_PyArg_CheckPositional("CreateJunction", nargs, 2, 2)) { + goto exit; + } + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("CreateJunction", 1, "str", args[0]); + goto exit; + } + #if USE_UNICODE_WCHAR_CACHE + src_path = _PyUnicode_AsUnicode(args[0]); + #else /* USE_UNICODE_WCHAR_CACHE */ + src_path = PyUnicode_AsWideCharString(args[0], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (src_path == NULL) { + goto exit; + } + if (!PyUnicode_Check(args[1])) { + _PyArg_BadArgument("CreateJunction", 2, "str", args[1]); + goto exit; + } + #if USE_UNICODE_WCHAR_CACHE + dst_path = _PyUnicode_AsUnicode(args[1]); + #else /* USE_UNICODE_WCHAR_CACHE */ + dst_path = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (dst_path == NULL) { goto exit; } return_value = _winapi_CreateJunction_impl(module, src_path, dst_path); exit: + /* Cleanup for src_path */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)src_path); + #endif /* USE_UNICODE_WCHAR_CACHE */ + /* Cleanup for dst_path */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)dst_path); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -367,13 +404,22 @@ _winapi_CreateProcess(PyObject *module, PyObject *const *args, Py_ssize_t nargs) const Py_UNICODE *current_directory; PyObject *startup_info; - if (!_PyArg_ParseStack(args, nargs, "ZOOOikOZO:CreateProcess", - &application_name, &command_line, &proc_attrs, &thread_attrs, &inherit_handles, &creation_flags, &env_mapping, ¤t_directory, &startup_info)) { + if (!_PyArg_ParseStack(args, nargs, "O&OOOikOO&O:CreateProcess", + _PyUnicode_WideCharString_Opt_Converter, &application_name, &command_line, &proc_attrs, &thread_attrs, &inherit_handles, &creation_flags, &env_mapping, _PyUnicode_WideCharString_Opt_Converter, ¤t_directory, &startup_info)) { goto exit; } return_value = _winapi_CreateProcess_impl(module, application_name, command_line, proc_attrs, thread_attrs, inherit_handles, creation_flags, env_mapping, current_directory, startup_info); exit: + /* Cleanup for application_name */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)application_name); + #endif /* USE_UNICODE_WCHAR_CACHE */ + /* Cleanup for current_directory */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)current_directory); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -706,8 +752,8 @@ _winapi_OpenFileMapping(PyObject *module, PyObject *const *args, Py_ssize_t narg LPCWSTR name; HANDLE _return_value; - if (!_PyArg_ParseStack(args, nargs, "kiu:OpenFileMapping", - &desired_access, &inherit_handle, &name)) { + if (!_PyArg_ParseStack(args, nargs, "kiO&:OpenFileMapping", + &desired_access, &inherit_handle, _PyUnicode_WideCharString_Converter, &name)) { goto exit; } _return_value = _winapi_OpenFileMapping_impl(module, desired_access, inherit_handle, name); @@ -720,6 +766,11 @@ _winapi_OpenFileMapping(PyObject *module, PyObject *const *args, Py_ssize_t narg return_value = HANDLE_TO_PYNUM(_return_value); exit: + /* Cleanup for name */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)name); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -1097,4 +1148,4 @@ _winapi_GetFileType(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P exit: return return_value; } -/*[clinic end generated code: output=f3897898ea1da99d input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b37c7990ebf35ed6 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/arraymodule.c.h b/Modules/clinic/arraymodule.c.h index 5f45b7cf673473..e34afa21970d63 100644 --- a/Modules/clinic/arraymodule.c.h +++ b/Modules/clinic/arraymodule.c.h @@ -435,7 +435,7 @@ array_array_tostring(arrayobject *self, PyObject *Py_UNUSED(ignored)) } PyDoc_STRVAR(array_array_fromunicode__doc__, -"fromunicode($self, ustr, /)\n" +"fromunicode($self, arg, /)\n" "--\n" "\n" "Extends this array with data from the unicode string ustr.\n" @@ -448,20 +448,23 @@ PyDoc_STRVAR(array_array_fromunicode__doc__, {"fromunicode", (PyCFunction)array_array_fromunicode, METH_O, array_array_fromunicode__doc__}, static PyObject * -array_array_fromunicode_impl(arrayobject *self, const Py_UNICODE *ustr, - Py_ssize_clean_t ustr_length); +array_array_fromunicode_impl(arrayobject *self, PyObject *arg); static PyObject * -array_array_fromunicode(arrayobject *self, PyObject *arg) +array_array_fromunicode(arrayobject *self, PyObject *arg_) { PyObject *return_value = NULL; - const Py_UNICODE *ustr; - Py_ssize_clean_t ustr_length; + PyObject *arg; - if (!PyArg_Parse(arg, "u#:fromunicode", &ustr, &ustr_length)) { + if (!PyUnicode_Check(arg_)) { + _PyArg_BadArgument("fromunicode", 0, "str", arg_); goto exit; } - return_value = array_array_fromunicode_impl(self, ustr, ustr_length); + if (PyUnicode_READY(arg_) == -1) { + goto exit; + } + arg = arg_; + return_value = array_array_fromunicode_impl(self, arg); exit: return return_value; @@ -599,4 +602,4 @@ PyDoc_STRVAR(array_arrayiterator___setstate____doc__, #define ARRAY_ARRAYITERATOR___SETSTATE___METHODDEF \ {"__setstate__", (PyCFunction)array_arrayiterator___setstate__, METH_O, array_arrayiterator___setstate____doc__}, -/*[clinic end generated code: output=c9a40f11f1a866fb input=a9049054013a1b77]*/ +/*[clinic end generated code: output=d955bcda11d2bd74 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index 55f2cbb91a083a..5bd415f87e32d9 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -1757,12 +1757,25 @@ os_system(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *k { PyObject *return_value = NULL; static const char * const _keywords[] = {"command", NULL}; - static _PyArg_Parser _parser = {"u:system", _keywords, 0}; + static _PyArg_Parser _parser = {NULL, _keywords, "system", 0}; + PyObject *argsbuf[1]; const Py_UNICODE *command; long _return_value; - if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - &command)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("system", 1, "str", args[0]); + goto exit; + } + #if USE_UNICODE_WCHAR_CACHE + command = _PyUnicode_AsUnicode(args[0]); + #else /* USE_UNICODE_WCHAR_CACHE */ + command = PyUnicode_AsWideCharString(args[0], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (command == NULL) { goto exit; } _return_value = os_system_impl(module, command); @@ -1772,6 +1785,11 @@ os_system(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *k return_value = PyLong_FromLong(_return_value); exit: + /* Cleanup for command */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)command); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -6799,19 +6817,44 @@ os_startfile(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject { PyObject *return_value = NULL; static const char * const _keywords[] = {"filepath", "operation", NULL}; - static _PyArg_Parser _parser = {"O&|u:startfile", _keywords, 0}; + static _PyArg_Parser _parser = {NULL, _keywords, "startfile", 0}; + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; path_t filepath = PATH_T_INITIALIZE("startfile", "filepath", 0, 0); const Py_UNICODE *operation = NULL; - if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - path_converter, &filepath, &operation)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 2, 0, argsbuf); + if (!args) { + goto exit; + } + if (!path_converter(args[0], &filepath)) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (!PyUnicode_Check(args[1])) { + _PyArg_BadArgument("startfile", 2, "str", args[1]); goto exit; } + #if USE_UNICODE_WCHAR_CACHE + operation = _PyUnicode_AsUnicode(args[1]); + #else /* USE_UNICODE_WCHAR_CACHE */ + operation = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (operation == NULL) { + goto exit; + } +skip_optional_pos: return_value = os_startfile_impl(module, &filepath, operation); exit: /* Cleanup for filepath */ path_cleanup(&filepath); + /* Cleanup for operation */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)operation); + #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -8480,4 +8523,4 @@ os_getrandom(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject #ifndef OS_GETRANDOM_METHODDEF #define OS_GETRANDOM_METHODDEF #endif /* !defined(OS_GETRANDOM_METHODDEF) */ -/*[clinic end generated code: output=1a9c62f5841221ae input=a9049054013a1b77]*/ +/*[clinic end generated code: output=bf771259c746b660 input=a9049054013a1b77]*/ diff --git a/Modules/overlapped.c b/Modules/overlapped.c index e5a209bf758297..6b407a4e6566f7 100644 --- a/Modules/overlapped.c +++ b/Modules/overlapped.c @@ -356,23 +356,44 @@ overlapped_CreateEvent(PyObject *self, PyObject *args) PyObject *EventAttributes; BOOL ManualReset; BOOL InitialState; + PyObject *Name_obj = NULL; Py_UNICODE *Name; HANDLE Event; - if (!PyArg_ParseTuple(args, "O" F_BOOL F_BOOL "Z", + if (!PyArg_ParseTuple(args, "O" F_BOOL F_BOOL "O", &EventAttributes, &ManualReset, - &InitialState, &Name)) + &InitialState, &Name_obj)) return NULL; if (EventAttributes != Py_None) { PyErr_SetString(PyExc_ValueError, "EventAttributes must be None"); return NULL; } + if (Name_obj == Py_None) { + Name = NULL; + } + else if (PyUnicode_Check(Name_obj)) { +#if USE_UNICODE_WCHAR_CACHE + Name = (wchar_t *)_PyUnicode_AsUnicode(Name_obj); +#else /* USE_UNICODE_WCHAR_CACHE */ + Name = PyUnicode_AsWideCharString(Name_obj, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ + if (Name == NULL) { + return NULL; + } + } + else { + _PyArg_BadArgument("CreateEvent", 4, "str or None", Name_obj); + return NULL; + } Py_BEGIN_ALLOW_THREADS Event = CreateEventW(NULL, ManualReset, InitialState, Name); Py_END_ALLOW_THREADS +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(Name); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (Event == NULL) return SetFromWindowsErr(0); return Py_BuildValue(F_HANDLE, Event); @@ -1085,6 +1106,7 @@ Overlapped_AcceptEx(OverlappedObject *self, PyObject *args) static int parse_address(PyObject *obj, SOCKADDR *Address, int Length) { + PyObject *Host_obj; Py_UNICODE *Host; unsigned short Port; unsigned long FlowInfo; @@ -1092,33 +1114,66 @@ parse_address(PyObject *obj, SOCKADDR *Address, int Length) memset(Address, 0, Length); - if (PyArg_ParseTuple(obj, "uH", &Host, &Port)) - { + switch (PyTuple_GET_SIZE(obj)) { + case 2: { + if (!PyArg_ParseTuple(obj, "UH", &Host_obj, &Port)) { + return -1; + } +#if USE_UNICODE_WCHAR_CACHE + Host = (wchar_t *)_PyUnicode_AsUnicode(Host_obj); +#else /* USE_UNICODE_WCHAR_CACHE */ + Host = PyUnicode_AsWideCharString(Host_obj, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ + if (Host == NULL) { + return -1; + } Address->sa_family = AF_INET; if (WSAStringToAddressW(Host, AF_INET, NULL, Address, &Length) < 0) { SetFromWindowsErr(WSAGetLastError()); - return -1; + Length = -1; } - ((SOCKADDR_IN*)Address)->sin_port = htons(Port); + else { + ((SOCKADDR_IN*)Address)->sin_port = htons(Port); + } +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(Host); +#endif /* USE_UNICODE_WCHAR_CACHE */ return Length; } - else if (PyArg_ParseTuple(obj, - "uHkk;ConnectEx(): illegal address_as_bytes " - "argument", &Host, &Port, &FlowInfo, &ScopeId)) - { - PyErr_Clear(); + case 4: { + if (!PyArg_ParseTuple(obj, + "UHkk;ConnectEx(): illegal address_as_bytes argument", + &Host_obj, &Port, &FlowInfo, &ScopeId)) + { + return -1; + } +#if USE_UNICODE_WCHAR_CACHE + Host = (wchar_t *)_PyUnicode_AsUnicode(Host_obj); +#else /* USE_UNICODE_WCHAR_CACHE */ + Host = PyUnicode_AsWideCharString(Host_obj, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ + if (Host == NULL) { + return -1; + } Address->sa_family = AF_INET6; if (WSAStringToAddressW(Host, AF_INET6, NULL, Address, &Length) < 0) { SetFromWindowsErr(WSAGetLastError()); - return -1; + Length = -1; + } + else { + ((SOCKADDR_IN6*)Address)->sin6_port = htons(Port); + ((SOCKADDR_IN6*)Address)->sin6_flowinfo = FlowInfo; + ((SOCKADDR_IN6*)Address)->sin6_scope_id = ScopeId; } - ((SOCKADDR_IN6*)Address)->sin6_port = htons(Port); - ((SOCKADDR_IN6*)Address)->sin6_flowinfo = FlowInfo; - ((SOCKADDR_IN6*)Address)->sin6_scope_id = ScopeId; +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(Host); +#endif /* USE_UNICODE_WCHAR_CACHE */ return Length; } - - return -1; + default: + PyErr_SetString(PyExc_ValueError, "illegal address_as_bytes argument"); + return -1; + } } diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 3f760183575aac..f225d70f91cbaa 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -900,6 +900,10 @@ typedef struct { static void path_cleanup(path_t *path) { +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((wchar_t *)path->wide); + path->wide = NULL; +#endif /* USE_UNICODE_WCHAR_CACHE */ Py_CLEAR(path->object); Py_CLEAR(path->cleanup); } @@ -914,7 +918,7 @@ path_converter(PyObject *o, void *p) const char *narrow; #ifdef MS_WINDOWS PyObject *wo = NULL; - const wchar_t *wide; + wchar_t *wide = NULL; #endif #define FORMAT_EXCEPTION(exc, fmt) \ @@ -988,7 +992,11 @@ path_converter(PyObject *o, void *p) if (is_unicode) { #ifdef MS_WINDOWS +#if USE_UNICODE_WCHAR_CACHE wide = PyUnicode_AsUnicodeAndSize(o, &length); +#else /* USE_UNICODE_WCHAR_CACHE */ + wide = PyUnicode_AsWideCharString(o, &length); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (!wide) { goto error_exit; } @@ -1004,6 +1012,9 @@ path_converter(PyObject *o, void *p) path->wide = wide; path->narrow = FALSE; path->fd = -1; +#if !USE_UNICODE_WCHAR_CACHE + wide = NULL; +#endif /* USE_UNICODE_WCHAR_CACHE */ goto success_exit; #else if (!PyUnicode_FSConverter(o, &bytes)) { @@ -1079,7 +1090,12 @@ path_converter(PyObject *o, void *p) goto error_exit; } +#if USE_UNICODE_WCHAR_CACHE wide = PyUnicode_AsUnicodeAndSize(wo, &length); +#else /* USE_UNICODE_WCHAR_CACHE */ + wide = PyUnicode_AsWideCharString(wo, &length); + Py_DECREF(wo); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (!wide) { goto error_exit; } @@ -1093,8 +1109,12 @@ path_converter(PyObject *o, void *p) } path->wide = wide; path->narrow = TRUE; - path->cleanup = wo; Py_DECREF(bytes); +#if USE_UNICODE_WCHAR_CACHE + path->cleanup = wo; +#else /* USE_UNICODE_WCHAR_CACHE */ + wide = NULL; +#endif /* USE_UNICODE_WCHAR_CACHE */ #else path->wide = NULL; path->narrow = narrow; @@ -1118,7 +1138,11 @@ path_converter(PyObject *o, void *p) Py_XDECREF(o); Py_XDECREF(bytes); #ifdef MS_WINDOWS +#if USE_UNICODE_WCHAR_CACHE Py_XDECREF(wo); +#else /* USE_UNICODE_WCHAR_CACHE */ + PyMem_Free(wide); +#endif /* USE_UNICODE_WCHAR_CACHE */ #endif return 0; } @@ -9722,7 +9746,6 @@ static PyObject * os_putenv_impl(PyObject *module, PyObject *name, PyObject *value) /*[clinic end generated code: output=d29a567d6b2327d2 input=ba586581c2e6105f]*/ { - const wchar_t *env; Py_ssize_t size; /* Search from index 1 because on Windows starting '=' is allowed for @@ -9733,14 +9756,33 @@ os_putenv_impl(PyObject *module, PyObject *name, PyObject *value) PyErr_SetString(PyExc_ValueError, "illegal environment variable name"); return NULL; } - PyObject *unicode = PyUnicode_FromFormat("%U=%U", name, value); - if (unicode == NULL) { + PyObject *buffer = PyUnicode_FromFormat("%U=%U", name, value); + if (buffer == NULL) { return NULL; } - env = PyUnicode_AsUnicodeAndSize(unicode, &size); +#if USE_UNICODE_WCHAR_CACHE + const wchar_t *env = PyUnicode_AsUnicodeAndSize(buffer, &size); if (env == NULL) goto error; +#else /* USE_UNICODE_WCHAR_CACHE */ + size = PyUnicode_AsWideChar(buffer, NULL, 0); + if (size < 0) { + return NULL; + } + if ((size_t)size > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) { + return PyErr_NoMemory(); + } + PyObject *bytes = PyBytes_FromStringAndSize(NULL, size * sizeof(wchar_t)); + if (bytes == NULL) { + goto error; + } + wchar_t *env = (wchar_t *)PyBytes_AS_STRING(bytes); + size = PyUnicode_AsWideChar(buffer, env, size); + assert(size >= 0); + Py_DECREF(buffer); + buffer = bytes; +#endif /* USE_UNICODE_WCHAR_CACHE */ if (size > _MAX_ENV) { PyErr_Format(PyExc_ValueError, "the environment variable is longer than %u characters", @@ -9757,11 +9799,11 @@ os_putenv_impl(PyObject *module, PyObject *name, PyObject *value) goto error; } - posix_putenv_garbage_setitem(name, unicode); + posix_putenv_garbage_setitem(name, buffer); Py_RETURN_NONE; error: - Py_DECREF(unicode); + Py_DECREF(buffer); return NULL; } #else /* MS_WINDOWS */ @@ -12178,7 +12220,12 @@ DirEntry_fetch_stat(DirEntry *self, int follow_symlinks) #ifdef MS_WINDOWS if (!PyUnicode_FSDecoder(self->path, &ub)) return NULL; +#if USE_UNICODE_WCHAR_CACHE const wchar_t *path = PyUnicode_AsUnicode(ub); +#else /* USE_UNICODE_WCHAR_CACHE */ + wchar_t *path = PyUnicode_AsWideCharString(ub, NULL); + Py_DECREF(ub); +#endif /* USE_UNICODE_WCHAR_CACHE */ #else /* POSIX */ if (!PyUnicode_FSConverter(self->path, &ub)) return NULL; @@ -12188,6 +12235,11 @@ DirEntry_fetch_stat(DirEntry *self, int follow_symlinks) result = fstatat(self->dir_fd, path, &st, follow_symlinks ? 0 : AT_SYMLINK_NOFOLLOW); #else +#if defined(MS_WINDOWS) && !USE_UNICODE_WCHAR_CACHE + PyMem_Free(path); +#else /* USE_UNICODE_WCHAR_CACHE */ + Py_DECREF(ub); +#endif /* USE_UNICODE_WCHAR_CACHE */ PyErr_SetString(PyExc_NotImplementedError, "can't fetch stat"); return NULL; #endif /* HAVE_FSTATAT */ @@ -12200,7 +12252,11 @@ DirEntry_fetch_stat(DirEntry *self, int follow_symlinks) else result = LSTAT(path, &st); } +#if defined(MS_WINDOWS) && !USE_UNICODE_WCHAR_CACHE + PyMem_Free(path); +#else /* USE_UNICODE_WCHAR_CACHE */ Py_DECREF(ub); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (result != 0) return path_object_error(self->path); @@ -12373,15 +12429,21 @@ os_DirEntry_inode_impl(DirEntry *self) #ifdef MS_WINDOWS if (!self->got_file_index) { PyObject *unicode; - const wchar_t *path; STRUCT_STAT stat; int result; if (!PyUnicode_FSDecoder(self->path, &unicode)) return NULL; - path = PyUnicode_AsUnicode(unicode); +#if USE_UNICODE_WCHAR_CACHE + const wchar_t *path = PyUnicode_AsUnicode(unicode); result = LSTAT(path, &stat); Py_DECREF(unicode); +#else /* USE_UNICODE_WCHAR_CACHE */ + wchar_t *path = PyUnicode_AsWideCharString(unicode, NULL); + Py_DECREF(unicode); + result = LSTAT(path, &stat); + PyMem_Free(path); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (result != 0) return path_object_error(self->path); @@ -12966,10 +13028,9 @@ os_scandir_impl(PyObject *module, path_t *path) iterator->dirp = NULL; #endif - memcpy(&iterator->path, path, sizeof(path_t)); /* Move the ownership to iterator->path */ - path->object = NULL; - path->cleanup = NULL; + memcpy(&iterator->path, path, sizeof(path_t)); + memset(path, 0, sizeof(path_t)); #ifdef MS_WINDOWS iterator->first_time = 1; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 8141ce757412cb..8f2f62fcbec8b4 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -108,10 +108,12 @@ extern "C" { PyUnicode_IS_COMPACT_ASCII(op) ? \ ((PyASCIIObject*)(op))->length : \ _PyUnicode_UTF8_LENGTH(op)) +#if HAVE_UNICODE_WCHAR_CACHE #define _PyUnicode_WSTR(op) \ (((PyASCIIObject*)(op))->wstr) #define _PyUnicode_WSTR_LENGTH(op) \ (((PyCompactUnicodeObject*)(op))->wstr_length) +#endif /* HAVE_UNICODE_WCHAR_CACHE */ #define _PyUnicode_LENGTH(op) \ (((PyASCIIObject *)(op))->length) #define _PyUnicode_STATE(op) \ @@ -138,9 +140,11 @@ extern "C" { (assert(_PyUnicode_CHECK(op)), \ assert(!PyUnicode_IS_COMPACT_ASCII(op)), \ (_PyUnicode_UTF8(op) == PyUnicode_DATA(op))) +#if HAVE_UNICODE_WCHAR_CACHE #define _PyUnicode_SHARE_WSTR(op) \ (assert(_PyUnicode_CHECK(op)), \ (_PyUnicode_WSTR(unicode) == PyUnicode_DATA(op))) +#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* true if the Unicode object has an allocated UTF-8 memory block (not shared with other data) */ @@ -149,12 +153,14 @@ extern "C" { && _PyUnicode_UTF8(op) \ && _PyUnicode_UTF8(op) != PyUnicode_DATA(op))) +#if HAVE_UNICODE_WCHAR_CACHE /* true if the Unicode object has an allocated wstr memory block (not shared with other data) */ #define _PyUnicode_HAS_WSTR_MEMORY(op) \ ((_PyUnicode_WSTR(op) && \ (!PyUnicode_IS_READY(op) || \ _PyUnicode_WSTR(op) != PyUnicode_DATA(op)))) +#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* Generic helper macro to convert characters of different types. from_type and to_type have to be valid type names, begin and end @@ -431,6 +437,7 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) PyUnicodeObject *unicode = (PyUnicodeObject *)op; data = unicode->data.any; +#if HAVE_UNICODE_WCHAR_CACHE if (kind == PyUnicode_WCHAR_KIND) { ASSERT(ascii->length == 0); ASSERT(ascii->hash == -1); @@ -442,7 +449,9 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) ASSERT(data == NULL); ASSERT(compact->utf8 == NULL); } - else { + else +#endif /* HAVE_UNICODE_WCHAR_CACHE */ + { ASSERT(kind == PyUnicode_1BYTE_KIND || kind == PyUnicode_2BYTE_KIND || kind == PyUnicode_4BYTE_KIND); @@ -457,6 +466,7 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) ASSERT (compact->utf8 != data); } } +#if HAVE_UNICODE_WCHAR_CACHE if (kind != PyUnicode_WCHAR_KIND) { if ( #if SIZEOF_WCHAR_T == 2 @@ -471,11 +481,14 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) } else ASSERT(ascii->wstr != data); } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (compact->utf8 == NULL) ASSERT(compact->utf8_length == 0); +#if HAVE_UNICODE_WCHAR_CACHE if (ascii->wstr == NULL) ASSERT(compact->wstr_length == 0); +#endif /* HAVE_UNICODE_WCHAR_CACHE */ } /* check that the best kind is used */ if (check_content && kind != PyUnicode_WCHAR_KIND) @@ -857,11 +870,13 @@ ensure_unicode(PyObject *obj) #include "stringlib/find_max_char.h" #include "stringlib/undef.h" +#if HAVE_UNICODE_WCHAR_CACHE #include "stringlib/unicodedefs.h" #include "stringlib/fastsearch.h" #include "stringlib/count.h" #include "stringlib/find.h" #include "stringlib/undef.h" +#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* --- Unicode Object ----------------------------------------------------- */ @@ -920,7 +935,6 @@ resize_compact(PyObject *unicode, Py_ssize_t length) Py_ssize_t char_size; Py_ssize_t struct_size; Py_ssize_t new_size; - int share_wstr; PyObject *new_unicode; #ifdef Py_DEBUG Py_ssize_t old_length = _PyUnicode_LENGTH(unicode); @@ -935,7 +949,9 @@ resize_compact(PyObject *unicode, Py_ssize_t length) struct_size = sizeof(PyASCIIObject); else struct_size = sizeof(PyCompactUnicodeObject); - share_wstr = _PyUnicode_SHARE_WSTR(unicode); +#if HAVE_UNICODE_WCHAR_CACHE + int share_wstr = _PyUnicode_SHARE_WSTR(unicode); +#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (length > ((PY_SSIZE_T_MAX - struct_size) / char_size - 1)) { PyErr_NoMemory(); @@ -961,6 +977,7 @@ resize_compact(PyObject *unicode, Py_ssize_t length) _Py_NewReference(unicode); _PyUnicode_LENGTH(unicode) = length; +#if HAVE_UNICODE_WCHAR_CACHE if (share_wstr) { _PyUnicode_WSTR(unicode) = PyUnicode_DATA(unicode); if (!PyUnicode_IS_ASCII(unicode)) @@ -972,6 +989,7 @@ resize_compact(PyObject *unicode, Py_ssize_t length) if (!PyUnicode_IS_ASCII(unicode)) _PyUnicode_WSTR_LENGTH(unicode) = 0; } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ #ifdef Py_DEBUG unicode_fill_invalid(unicode, old_length); #endif @@ -984,14 +1002,13 @@ resize_compact(PyObject *unicode, Py_ssize_t length) static int resize_inplace(PyObject *unicode, Py_ssize_t length) { - wchar_t *wstr; Py_ssize_t new_size; assert(!PyUnicode_IS_COMPACT(unicode)); assert(Py_REFCNT(unicode) == 1); if (PyUnicode_IS_READY(unicode)) { Py_ssize_t char_size; - int share_wstr, share_utf8; + int share_utf8; void *data; #ifdef Py_DEBUG Py_ssize_t old_length = _PyUnicode_LENGTH(unicode); @@ -999,7 +1016,9 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) data = _PyUnicode_DATA_ANY(unicode); char_size = PyUnicode_KIND(unicode); - share_wstr = _PyUnicode_SHARE_WSTR(unicode); +#if HAVE_UNICODE_WCHAR_CACHE + int share_wstr = _PyUnicode_SHARE_WSTR(unicode); +#endif /* HAVE_UNICODE_WCHAR_CACHE */ share_utf8 = _PyUnicode_SHARE_UTF8(unicode); if (length > (PY_SSIZE_T_MAX / char_size - 1)) { @@ -1021,10 +1040,12 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) return -1; } _PyUnicode_DATA_ANY(unicode) = data; +#if HAVE_UNICODE_WCHAR_CACHE if (share_wstr) { _PyUnicode_WSTR(unicode) = data; _PyUnicode_WSTR_LENGTH(unicode) = length; } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (share_utf8) { _PyUnicode_UTF8(unicode) = data; _PyUnicode_UTF8_LENGTH(unicode) = length; @@ -1034,11 +1055,16 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) #ifdef Py_DEBUG unicode_fill_invalid(unicode, old_length); #endif +#if HAVE_UNICODE_WCHAR_CACHE if (share_wstr || _PyUnicode_WSTR(unicode) == NULL) { +#endif /* HAVE_UNICODE_WCHAR_CACHE */ assert(_PyUnicode_CheckConsistency(unicode, 0)); return 0; +#if HAVE_UNICODE_WCHAR_CACHE } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ } +#if HAVE_UNICODE_WCHAR_CACHE assert(_PyUnicode_WSTR(unicode) != NULL); /* check for integer overflow */ @@ -1047,7 +1073,7 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) return -1; } new_size = sizeof(wchar_t) * (length + 1); - wstr = _PyUnicode_WSTR(unicode); + wchar_t *wstr = _PyUnicode_WSTR(unicode); wstr = PyObject_REALLOC(wstr, new_size); if (!wstr) { PyErr_NoMemory(); @@ -1056,6 +1082,7 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) _PyUnicode_WSTR(unicode) = wstr; _PyUnicode_WSTR(unicode)[length] = 0; _PyUnicode_WSTR_LENGTH(unicode) = length; +#endif /* HAVE_UNICODE_WCHAR_CACHE */ assert(_PyUnicode_CheckConsistency(unicode, 0)); return 0; } @@ -1064,20 +1091,8 @@ static PyObject* resize_copy(PyObject *unicode, Py_ssize_t length) { Py_ssize_t copy_length; - if (_PyUnicode_KIND(unicode) != PyUnicode_WCHAR_KIND) { - PyObject *copy; - - assert(PyUnicode_IS_READY(unicode)); - - copy = PyUnicode_New(length, PyUnicode_MAX_CHAR_VALUE(unicode)); - if (copy == NULL) - return NULL; - - copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode)); - _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, copy_length); - return copy; - } - else { +#if HAVE_UNICODE_WCHAR_CACHE + if (_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND) { PyObject *w; w = (PyObject*)_PyUnicode_New(length); @@ -1089,6 +1104,18 @@ resize_copy(PyObject *unicode, Py_ssize_t length) copy_length * sizeof(wchar_t)); return w; } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ + PyObject *copy; + + assert(PyUnicode_IS_READY(unicode)); + + copy = PyUnicode_New(length, PyUnicode_MAX_CHAR_VALUE(unicode)); + if (copy == NULL) + return NULL; + + copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode)); + _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, copy_length); + return copy; } /* We allocate one more byte to make sure the string is @@ -1103,15 +1130,13 @@ resize_copy(PyObject *unicode, Py_ssize_t length) static PyUnicodeObject * _PyUnicode_New(Py_ssize_t length) { - PyUnicodeObject *unicode; - size_t new_size; - /* Optimization for empty strings */ - if (length == 0 && unicode_empty != NULL) { - Py_INCREF(unicode_empty); - return (PyUnicodeObject*)unicode_empty; + if (length == 0) { + _Py_INCREF_UNICODE_EMPTY(); + return (PyUnicodeObject *)unicode_empty; } +#if HAVE_UNICODE_WCHAR_CACHE /* Ensure we won't overflow the size. */ if (length > ((PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UNICODE)) - 1)) { return (PyUnicodeObject *)PyErr_NoMemory(); @@ -1122,10 +1147,10 @@ _PyUnicode_New(Py_ssize_t length) return NULL; } - unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type); + PyUnicodeObject *unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type); if (unicode == NULL) return NULL; - new_size = sizeof(Py_UNICODE) * ((size_t)length + 1); + size_t new_size = sizeof(Py_UNICODE) * ((size_t)length + 1); _PyUnicode_WSTR_LENGTH(unicode) = length; _PyUnicode_HASH(unicode) = -1; @@ -1158,6 +1183,11 @@ _PyUnicode_New(Py_ssize_t length) assert(_PyUnicode_CheckConsistency((PyObject *)unicode, 0)); return unicode; +#else /* HAVE_UNICODE_WCHAR_CACHE */ + PyErr_SetString(PyExc_SystemError, + "_PyUnicode_New() with non-zero size is not supported"); + return NULL; +#endif /* HAVE_UNICODE_WCHAR_CACHE */ } static const char* @@ -1239,15 +1269,21 @@ _PyUnicode_Dump(PyObject *op) } else data = unicode->data.any; - printf("%s: len=%" PY_FORMAT_SIZE_T "u, ", + printf("%s: len=%" PY_FORMAT_SIZE_T "u", unicode_kind_name(op), ascii->length); +#if HAVE_UNICODE_WCHAR_CACHE + printf(", "); if (ascii->wstr == data) printf("shared "); printf("wstr=%p", ascii->wstr); +#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (!(ascii->state.ascii == 1 && ascii->state.compact == 1)) { - printf(" (%" PY_FORMAT_SIZE_T "u), ", compact->wstr_length); +#if HAVE_UNICODE_WCHAR_CACHE + printf(" (%" PY_FORMAT_SIZE_T "u)", compact->wstr_length); +#endif /* HAVE_UNICODE_WCHAR_CACHE */ + printf(", "); if (!ascii->state.compact && compact->utf8 == unicode->data.any) printf("shared "); printf("utf8=%p (%" PY_FORMAT_SIZE_T "u)", @@ -1264,7 +1300,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) PyCompactUnicodeObject *unicode; void *data; enum PyUnicode_Kind kind; - int is_sharing, is_ascii; + int is_ascii; Py_ssize_t char_size; Py_ssize_t struct_size; @@ -1275,23 +1311,17 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) } is_ascii = 0; - is_sharing = 0; struct_size = sizeof(PyCompactUnicodeObject); if (maxchar < 128) { kind = PyUnicode_1BYTE_KIND; - char_size = 1; is_ascii = 1; struct_size = sizeof(PyASCIIObject); } else if (maxchar < 256) { kind = PyUnicode_1BYTE_KIND; - char_size = 1; } else if (maxchar < 65536) { kind = PyUnicode_2BYTE_KIND; - char_size = 2; - if (sizeof(wchar_t) == 2) - is_sharing = 1; } else { if (maxchar > MAX_UNICODE) { @@ -1300,10 +1330,11 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) return NULL; } kind = PyUnicode_4BYTE_KIND; - char_size = 4; - if (sizeof(wchar_t) == 4) - is_sharing = 1; } + char_size = kind; +#if HAVE_UNICODE_WCHAR_CACHE + int is_sharing = (sizeof(wchar_t) == kind); +#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* Ensure we won't overflow the size. */ if (size < 0) { @@ -1339,29 +1370,35 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) _PyUnicode_STATE(unicode).ascii = is_ascii; if (is_ascii) { ((char*)data)[size] = 0; +#if HAVE_UNICODE_WCHAR_CACHE _PyUnicode_WSTR(unicode) = NULL; - } - else if (kind == PyUnicode_1BYTE_KIND) { - ((char*)data)[size] = 0; - _PyUnicode_WSTR(unicode) = NULL; - _PyUnicode_WSTR_LENGTH(unicode) = 0; - unicode->utf8 = NULL; - unicode->utf8_length = 0; +#endif /* HAVE_UNICODE_WCHAR_CACHE */ } else { unicode->utf8 = NULL; unicode->utf8_length = 0; - if (kind == PyUnicode_2BYTE_KIND) - ((Py_UCS2*)data)[size] = 0; - else /* kind == PyUnicode_4BYTE_KIND */ - ((Py_UCS4*)data)[size] = 0; - if (is_sharing) { - _PyUnicode_WSTR_LENGTH(unicode) = size; - _PyUnicode_WSTR(unicode) = (wchar_t *)data; + if (kind == PyUnicode_1BYTE_KIND) { + ((char*)data)[size] = 0; +#if HAVE_UNICODE_WCHAR_CACHE + _PyUnicode_WSTR(unicode) = NULL; + _PyUnicode_WSTR_LENGTH(unicode) = 0; +#endif /* HAVE_UNICODE_WCHAR_CACHE */ } else { - _PyUnicode_WSTR_LENGTH(unicode) = 0; - _PyUnicode_WSTR(unicode) = NULL; + if (kind == PyUnicode_2BYTE_KIND) + ((Py_UCS2*)data)[size] = 0; + else /* kind == PyUnicode_4BYTE_KIND */ + ((Py_UCS4*)data)[size] = 0; +#if HAVE_UNICODE_WCHAR_CACHE + if (is_sharing) { + _PyUnicode_WSTR_LENGTH(unicode) = size; + _PyUnicode_WSTR(unicode) = (wchar_t *)data; + } + else { + _PyUnicode_WSTR_LENGTH(unicode) = 0; + _PyUnicode_WSTR(unicode) = NULL; + } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ } } #ifdef Py_DEBUG @@ -1675,12 +1712,14 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end, int _PyUnicode_Ready(PyObject *unicode) { +#if HAVE_UNICODE_WCHAR_CACHE wchar_t *end; Py_UCS4 maxchar = 0; Py_ssize_t num_surrogates; #if SIZEOF_WCHAR_T == 2 Py_ssize_t length_wo_surrogates; #endif +#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* _PyUnicode_Ready() is only intended for old-style API usage where strings were created using _PyObject_New() and where no canonical @@ -1688,12 +1727,15 @@ _PyUnicode_Ready(PyObject *unicode) which are not yet ready. */ assert(_PyUnicode_CHECK(unicode)); assert(_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND); +#if HAVE_UNICODE_WCHAR_CACHE assert(_PyUnicode_WSTR(unicode) != NULL); +#endif /* HAVE_UNICODE_WCHAR_CACHE */ assert(_PyUnicode_DATA_ANY(unicode) == NULL); assert(_PyUnicode_UTF8(unicode) == NULL); /* Actually, it should neither be interned nor be anything else: */ assert(_PyUnicode_STATE(unicode).interned == SSTATE_NOT_INTERNED); +#if HAVE_UNICODE_WCHAR_CACHE end = _PyUnicode_WSTR(unicode) + _PyUnicode_WSTR_LENGTH(unicode); if (find_maxchar_surrogates(_PyUnicode_WSTR(unicode), end, &maxchar, &num_surrogates) == -1) @@ -1796,6 +1838,7 @@ _PyUnicode_Ready(PyObject *unicode) #endif PyUnicode_4BYTE_DATA(unicode)[_PyUnicode_LENGTH(unicode)] = '\0'; } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ _PyUnicode_STATE(unicode).ready = 1; assert(_PyUnicode_CheckConsistency(unicode, 1)); return 0; @@ -1824,8 +1867,10 @@ unicode_dealloc(PyObject *unicode) Py_FatalError("Inconsistent interned string state."); } +#if HAVE_UNICODE_WCHAR_CACHE if (_PyUnicode_HAS_WSTR_MEMORY(unicode)) PyObject_DEL(_PyUnicode_WSTR(unicode)); +#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) PyObject_DEL(_PyUnicode_UTF8(unicode)); if (!PyUnicode_IS_COMPACT(unicode) && _PyUnicode_DATA_ANY(unicode)) @@ -1883,9 +1928,11 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length) assert(PyUnicode_Check(unicode)); assert(0 <= length); +#if HAVE_UNICODE_WCHAR_CACHE if (_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND) old_length = PyUnicode_WSTR_LENGTH(unicode); else +#endif /* HAVE_UNICODE_WCHAR_CACHE */ old_length = PyUnicode_GET_LENGTH(unicode); if (old_length == length) return 0; @@ -2975,9 +3022,11 @@ unicode_get_widechar_size(PyObject *unicode) assert(unicode != NULL); assert(_PyUnicode_CHECK(unicode)); +#if USE_UNICODE_WCHAR_CACHE if (_PyUnicode_WSTR(unicode) != NULL) { return PyUnicode_WSTR_LENGTH(unicode); } +#endif /* USE_UNICODE_WCHAR_CACHE */ assert(PyUnicode_IS_READY(unicode)); res = _PyUnicode_LENGTH(unicode); @@ -2998,16 +3047,21 @@ unicode_get_widechar_size(PyObject *unicode) static void unicode_copy_as_widechar(PyObject *unicode, wchar_t *w, Py_ssize_t size) { - const wchar_t *wstr; - assert(unicode != NULL); assert(_PyUnicode_CHECK(unicode)); - wstr = _PyUnicode_WSTR(unicode); +#if USE_UNICODE_WCHAR_CACHE + const wchar_t *wstr = _PyUnicode_WSTR(unicode); if (wstr != NULL) { memcpy(w, wstr, size * sizeof(wchar_t)); return; } +#else /* USE_UNICODE_WCHAR_CACHE */ + if (PyUnicode_KIND(unicode) == sizeof(wchar_t)) { + memcpy(w, PyUnicode_DATA(unicode), size * sizeof(wchar_t)); + return; + } +#endif /* USE_UNICODE_WCHAR_CACHE */ assert(PyUnicode_IS_READY(unicode)); if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND) { @@ -3122,6 +3176,74 @@ PyUnicode_AsWideCharString(PyObject *unicode, #endif /* HAVE_WCHAR_H */ +int +_PyUnicode_WideCharString_Converter(PyObject *obj, void *ptr) +{ + wchar_t **p = (wchar_t **)ptr; + if (obj == NULL) { +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(*p); +#endif /* USE_UNICODE_WCHAR_CACHE */ + *p = NULL; + return 1; + } + if (PyUnicode_Check(obj)) { +#if USE_UNICODE_WCHAR_CACHE + *p = (wchar_t *)_PyUnicode_AsUnicode(obj); + if (*p == NULL) { + return 0; + } + return 1; +#else /* USE_UNICODE_WCHAR_CACHE */ + *p = PyUnicode_AsWideCharString(obj, NULL); + if (*p == NULL) { + return 0; + } + return Py_CLEANUP_SUPPORTED; +#endif /* USE_UNICODE_WCHAR_CACHE */ + } + PyErr_Format(PyExc_TypeError, + "argument must be str, not %.50s", + obj->ob_type->tp_name); + return 0; +} + +int +_PyUnicode_WideCharString_Opt_Converter(PyObject *obj, void *ptr) +{ + wchar_t **p = (wchar_t **)ptr; + if (obj == NULL) { +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(*p); +#endif /* USE_UNICODE_WCHAR_CACHE */ + *p = NULL; + return 1; + } + if (obj == Py_None) { + *p = NULL; + return 1; + } + if (PyUnicode_Check(obj)) { +#if USE_UNICODE_WCHAR_CACHE + *p = (wchar_t *)_PyUnicode_AsUnicode(obj); + if (*p == NULL) { + return 0; + } + return 1; +#else /* USE_UNICODE_WCHAR_CACHE */ + *p = PyUnicode_AsWideCharString(obj, NULL); + if (*p == NULL) { + return 0; + } + return Py_CLEANUP_SUPPORTED; +#endif /* USE_UNICODE_WCHAR_CACHE */ + } + PyErr_Format(PyExc_TypeError, + "argument must be str or None, not %.50s", + obj->ob_type->tp_name); + return 0; +} + PyObject * PyUnicode_FromOrdinal(int ordinal) { @@ -3906,6 +4028,7 @@ PyUnicode_AsUTF8(PyObject *unicode) Py_UNICODE * PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size) { +#if HAVE_UNICODE_WCHAR_CACHE if (!PyUnicode_Check(unicode)) { PyErr_BadArgument(); return NULL; @@ -3935,6 +4058,10 @@ PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size) if (size != NULL) *size = PyUnicode_WSTR_LENGTH(unicode); return w; +#else /* HAVE_UNICODE_WCHAR_CACHE */ + PyErr_SetString(PyExc_SystemError, "PyUnicode_AsUnicodeAndSize is not supported"); + return NULL; +#endif /* HAVE_UNICODE_WCHAR_CACHE */ } Py_UNICODE * @@ -3961,6 +4088,7 @@ _PyUnicode_AsUnicode(PyObject *unicode) Py_ssize_t PyUnicode_GetSize(PyObject *unicode) { +#if HAVE_UNICODE_WCHAR_CACHE if (!PyUnicode_Check(unicode)) { PyErr_BadArgument(); goto onError; @@ -3973,6 +4101,10 @@ PyUnicode_GetSize(PyObject *unicode) onError: return -1; +#else /* HAVE_UNICODE_WCHAR_CACHE */ + PyErr_SetString(PyExc_SystemError, "PyUnicode_GetSize is not supported"); + return -1; +#endif /* HAVE_UNICODE_WCHAR_CACHE */ } Py_ssize_t @@ -4104,7 +4236,6 @@ unicode_decode_call_errorhandler_wchar( Py_ssize_t requiredsize; Py_ssize_t newpos; PyObject *inputobj = NULL; - wchar_t *repwstr; Py_ssize_t repwlen; if (*errorHandler == NULL) { @@ -4150,9 +4281,16 @@ unicode_decode_call_errorhandler_wchar( goto onError; } - repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen); - if (repwstr == NULL) +#if USE_UNICODE_WCHAR_CACHE + repwlen = PyUnicode_GetSize(repunicode); + if (repwlen < 0) goto onError; +#else /* USE_UNICODE_WCHAR_CACHE */ + repwlen = PyUnicode_AsWideChar(repunicode, NULL, 0); + if (repwlen < 0) + goto onError; + repwlen--; +#endif /* USE_UNICODE_WCHAR_CACHE */ /* need more space? (at least enough for what we have+the replacement+the rest of the string (starting at the new input position), so we won't have to check space @@ -4172,7 +4310,7 @@ unicode_decode_call_errorhandler_wchar( goto onError; } } - wcsncpy(*buf + *outpos, repwstr, repwlen); + PyUnicode_AsWideChar(repunicode, *buf + *outpos, repwlen); *outpos += repwlen; *endinpos = newpos; *inptr = *input + newpos; @@ -7461,6 +7599,7 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes, /* Create a substring so that we can get the UTF-16 representation of just the slice under consideration. */ PyObject *substring; + int ret = -1; assert(len > 0); @@ -7472,11 +7611,19 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes, substring = PyUnicode_Substring(unicode, offset, offset+len); if (substring == NULL) return -1; +#if USE_UNICODE_WCHAR_CACHE p = PyUnicode_AsUnicodeAndSize(substring, &size); if (p == NULL) { Py_DECREF(substring); return -1; } +#else /* USE_UNICODE_WCHAR_CACHE */ + p = PyUnicode_AsWideCharString(substring, &size); + Py_CLEAR(substring); + if (p == NULL) { + return -1; + } +#endif /* USE_UNICODE_WCHAR_CACHE */ assert(size <= INT_MAX); /* First get the size of the result */ @@ -7488,16 +7635,15 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes, goto error; /* If we used a default char, then we failed! */ if (pusedDefaultChar && *pusedDefaultChar) { - Py_DECREF(substring); - return -2; + ret = -2; + goto done; } if (*outbytes == NULL) { /* Create string object */ *outbytes = PyBytes_FromStringAndSize(NULL, outsize); if (*outbytes == NULL) { - Py_DECREF(substring); - return -1; + goto done; } out = PyBytes_AS_STRING(*outbytes); } @@ -7506,12 +7652,10 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes, const Py_ssize_t n = PyBytes_Size(*outbytes); if (outsize > PY_SSIZE_T_MAX - n) { PyErr_NoMemory(); - Py_DECREF(substring); - return -1; + goto done; } if (_PyBytes_Resize(outbytes, n + outsize) < 0) { - Py_DECREF(substring); - return -1; + goto done; } out = PyBytes_AS_STRING(*outbytes) + n; } @@ -7521,19 +7665,29 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes, p, (int)size, out, outsize, NULL, pusedDefaultChar); - Py_CLEAR(substring); if (outsize <= 0) goto error; - if (pusedDefaultChar && *pusedDefaultChar) - return -2; - return 0; + if (pusedDefaultChar && *pusedDefaultChar) { + ret = -2; + goto done; + } + ret = 0; + +done: +#if USE_UNICODE_WCHAR_CACHE + Py_DECREF(substring); +#else /* USE_UNICODE_WCHAR_CACHE */ + PyMem_Free(p); +#endif /* USE_UNICODE_WCHAR_CACHE */ + return ret; error: - Py_XDECREF(substring); - if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION) - return -2; + if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION) { + ret = -2; + goto done; + } PyErr_SetFromWindowsErr(0); - return -1; + goto done; } /* @@ -11019,10 +11173,11 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) Py_ssize_t i; int kind; Py_UCS4 chr; - const unsigned char *ustr = (const unsigned char *)str; assert(_PyUnicode_CHECK(uni)); +#if HAVE_UNICODE_WCHAR_CACHE if (!PyUnicode_IS_READY(uni)) { + const unsigned char *ustr = (const unsigned char *)str; const wchar_t *ws = _PyUnicode_WSTR(uni); /* Compare Unicode string and source character set string */ for (i = 0; (chr = ws[i]) && ustr[i]; i++) { @@ -11037,6 +11192,7 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) return -1; /* str is longer */ return 0; } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ kind = PyUnicode_KIND(uni); if (kind == PyUnicode_1BYTE_KIND) { const void *data = PyUnicode_1BYTE_DATA(uni); @@ -11074,6 +11230,7 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) } } +#if HAVE_UNICODE_WCHAR_CACHE static int non_ready_unicode_equal_to_ascii_string(PyObject *unicode, const char *str) { @@ -11091,6 +11248,7 @@ non_ready_unicode_equal_to_ascii_string(PyObject *unicode, const char *str) } return 1; } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ int _PyUnicode_EqualToASCIIString(PyObject *unicode, const char *str) @@ -11103,11 +11261,13 @@ _PyUnicode_EqualToASCIIString(PyObject *unicode, const char *str) assert((unsigned char)*p < 128); } #endif +#if HAVE_UNICODE_WCHAR_CACHE if (PyUnicode_READY(unicode) == -1) { /* Memory error or bad data */ PyErr_Clear(); return non_ready_unicode_equal_to_ascii_string(unicode, str); } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (!PyUnicode_IS_ASCII(unicode)) return 0; len = (size_t)PyUnicode_GET_LENGTH(unicode); @@ -11129,11 +11289,13 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right) } #endif +#if HAVE_UNICODE_WCHAR_CACHE if (PyUnicode_READY(left) == -1) { /* memory error or bad data */ PyErr_Clear(); return non_ready_unicode_equal_to_ascii_string(left, right->string); } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (!PyUnicode_IS_ASCII(left)) return 0; @@ -11403,7 +11565,7 @@ PyUnicode_AppendAndDel(PyObject **pleft, PyObject *right) } /* -Wraps stringlib_parse_args_finds() and additionally ensures that the +Wraps asciilib_parse_args_finds() and additionally ensures that the first argument is a unicode object. */ @@ -11412,7 +11574,7 @@ parse_args_finds_unicode(const char * function_name, PyObject *args, PyObject **substring, Py_ssize_t *start, Py_ssize_t *end) { - if(stringlib_parse_args_finds(function_name, args, substring, + if(asciilib_parse_args_finds(function_name, args, substring, start, end)) { if (ensure_unicode(*substring) < 0) return 0; @@ -13387,7 +13549,7 @@ unicode_startswith(PyObject *self, Py_ssize_t end = PY_SSIZE_T_MAX; int result; - if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end)) + if (!asciilib_parse_args_finds("startswith", args, &subobj, &start, &end)) return NULL; if (PyTuple_Check(subobj)) { Py_ssize_t i; @@ -13441,7 +13603,7 @@ unicode_endswith(PyObject *self, Py_ssize_t end = PY_SSIZE_T_MAX; int result; - if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end)) + if (!asciilib_parse_args_finds("endswith", args, &subobj, &start, &end)) return NULL; if (PyTuple_Check(subobj)) { Py_ssize_t i; @@ -13875,10 +14037,12 @@ unicode_sizeof_impl(PyObject *self) size += (PyUnicode_GET_LENGTH(self) + 1) * PyUnicode_KIND(self); } +#if HAVE_UNICODE_WCHAR_CACHE /* If the wstr pointer is present, account for it unless it is shared with the data pointer. Check if the data is not shared. */ if (_PyUnicode_HAS_WSTR_MEMORY(self)) size += (PyUnicode_WSTR_LENGTH(self) + 1) * sizeof(wchar_t); +#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (_PyUnicode_HAS_UTF8_MEMORY(self)) size += PyUnicode_UTF8_LENGTH(self) + 1; @@ -15043,8 +15207,8 @@ static PyObject * unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { PyObject *unicode, *self; - Py_ssize_t length, char_size; - int share_wstr, share_utf8; + Py_ssize_t length; + int share_utf8; unsigned int kind; void *data; @@ -15078,37 +15242,35 @@ unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) _PyUnicode_STATE(self).compact = 0; _PyUnicode_STATE(self).ascii = _PyUnicode_STATE(unicode).ascii; _PyUnicode_STATE(self).ready = 1; - _PyUnicode_WSTR(self) = NULL; _PyUnicode_UTF8_LENGTH(self) = 0; _PyUnicode_UTF8(self) = NULL; +#if HAVE_UNICODE_WCHAR_CACHE _PyUnicode_WSTR_LENGTH(self) = 0; + _PyUnicode_WSTR(self) = NULL; +#endif /* HAVE_UNICODE_WCHAR_CACHE */ _PyUnicode_DATA_ANY(self) = NULL; share_utf8 = 0; - share_wstr = 0; +#if HAVE_UNICODE_WCHAR_CACHE + int share_wstr = 0; +#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (kind == PyUnicode_1BYTE_KIND) { - char_size = 1; if (PyUnicode_MAX_CHAR_VALUE(unicode) < 128) share_utf8 = 1; } - else if (kind == PyUnicode_2BYTE_KIND) { - char_size = 2; - if (sizeof(wchar_t) == 2) - share_wstr = 1; - } +#if HAVE_UNICODE_WCHAR_CACHE else { - assert(kind == PyUnicode_4BYTE_KIND); - char_size = 4; - if (sizeof(wchar_t) == 4) + if (sizeof(wchar_t) == kind) share_wstr = 1; } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* Ensure we won't overflow the length. */ - if (length > (PY_SSIZE_T_MAX / char_size - 1)) { + if (length > (PY_SSIZE_T_MAX / kind - 1)) { PyErr_NoMemory(); goto onError; } - data = PyObject_MALLOC((length + 1) * char_size); + data = PyObject_MALLOC((length + 1) * kind); if (data == NULL) { PyErr_NoMemory(); goto onError; @@ -15119,10 +15281,12 @@ unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) _PyUnicode_UTF8_LENGTH(self) = length; _PyUnicode_UTF8(self) = data; } +#if HAVE_UNICODE_WCHAR_CACHE if (share_wstr) { _PyUnicode_WSTR_LENGTH(self) = length; _PyUnicode_WSTR(self) = (wchar_t *)data; } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ memcpy(data, PyUnicode_DATA(unicode), kind * (length + 1)); @@ -15449,7 +15613,7 @@ unicodeiter_reduce(unicodeiterobject *it, PyObject *Py_UNUSED(ignored)) return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter), it->it_seq, it->it_index); } else { - PyObject *u = (PyObject *)_PyUnicode_New(0); + PyObject *u = PyUnicode_New(0, 0); if (u == NULL) return NULL; return Py_BuildValue("N(N)", _PyEval_GetBuiltinId(&PyId_iter), u); @@ -15631,30 +15795,12 @@ Py_UNICODE_strrchr(const Py_UNICODE *s, Py_UNICODE c) Py_UNICODE* PyUnicode_AsUnicodeCopy(PyObject *unicode) { - Py_UNICODE *u, *copy; - Py_ssize_t len, size; - - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - return NULL; - } - u = PyUnicode_AsUnicodeAndSize(unicode, &len); - if (u == NULL) - return NULL; - /* Ensure we won't overflow the size. */ - if (len > ((PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UNICODE)) - 1)) { - PyErr_NoMemory(); - return NULL; - } - size = len + 1; /* copy the null character */ - size *= sizeof(Py_UNICODE); - copy = PyMem_Malloc(size); - if (copy == NULL) { - PyErr_NoMemory(); + Py_ssize_t len; +#if USE_UNICODE_WCHAR_CACHE + if (PyUnicode_AsUnicode(unicode) == NULL) return NULL; - } - memcpy(copy, u, size); - return copy; +#endif /* USE_UNICODE_WCHAR_CACHE */ + return PyUnicode_AsWideCharString(unicode, &len); } /* A _string module, to export formatter_parser and formatter_field_name_split diff --git a/PC/_msi.c b/PC/_msi.c index ae30acbc9b48d4..0cd5398ec67552 100644 --- a/PC/_msi.c +++ b/PC/_msi.c @@ -421,11 +421,21 @@ record_setstring(msiobj* record, PyObject *args) { int status; int field; + PyObject *data_obj; wchar_t *data; - if (!PyArg_ParseTuple(args, "iu:SetString", &field, &data)) + if (!PyArg_ParseTuple(args, "iU:SetString", &field, &data_obj)) return NULL; +#if USE_UNICODE_WCHAR_CACHE + data = (wchar_t *)_PyUnicode_AsUnicode(data_obj); +#else /* USE_UNICODE_WCHAR_CACHE */ + data = PyUnicode_AsWideCharString(data_obj, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ + if (data == NULL) { + return NULL; + } + if ((status = MsiRecordSetStringW(record->h, field, data)) != ERROR_SUCCESS) return msierror(status); @@ -622,12 +632,19 @@ summary_setproperty(msiobj* si, PyObject *args) return NULL; if (PyUnicode_Check(data)) { +#if USE_UNICODE_WCHAR_CACHE const WCHAR *value = _PyUnicode_AsUnicode(data); +#else /* USE_UNICODE_WCHAR_CACHE */ + WCHAR *value = PyUnicode_AsWideCharString(data, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (value == NULL) { return NULL; } status = MsiSummaryInfoSetPropertyW(si->h, field, VT_LPSTR, 0, NULL, value); +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(value); +#endif /* USE_UNICODE_WCHAR_CACHE */ } else if (PyLong_CheckExact(data)) { long value = PyLong_AsLong(data); if (value == -1 && PyErr_Occurred()) { diff --git a/PC/clinic/winreg.c.h b/PC/clinic/winreg.c.h index 50210250ed1967..666108e481d292 100644 --- a/PC/clinic/winreg.c.h +++ b/PC/clinic/winreg.c.h @@ -152,8 +152,27 @@ winreg_ConnectRegistry(PyObject *module, PyObject *const *args, Py_ssize_t nargs HKEY key; HKEY _return_value; - if (!_PyArg_ParseStack(args, nargs, "ZO&:ConnectRegistry", - &computer_name, clinic_HKEY_converter, &key)) { + if (!_PyArg_CheckPositional("ConnectRegistry", nargs, 2, 2)) { + goto exit; + } + if (args[0] == Py_None) { + computer_name = NULL; + } + else if (PyUnicode_Check(args[0])) { + #if USE_UNICODE_WCHAR_CACHE + computer_name = _PyUnicode_AsUnicode(args[0]); + #else /* USE_UNICODE_WCHAR_CACHE */ + computer_name = PyUnicode_AsWideCharString(args[0], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (computer_name == NULL) { + goto exit; + } + } + else { + _PyArg_BadArgument("ConnectRegistry", 1, "str or None", args[0]); + goto exit; + } + if (!clinic_HKEY_converter(args[1], &key)) { goto exit; } _return_value = winreg_ConnectRegistry_impl(module, computer_name, key); @@ -163,6 +182,11 @@ winreg_ConnectRegistry(PyObject *module, PyObject *const *args, Py_ssize_t nargs return_value = PyHKEY_FromHKEY(_return_value); exit: + /* Cleanup for computer_name */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)computer_name); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -199,8 +223,27 @@ winreg_CreateKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs) const Py_UNICODE *sub_key; HKEY _return_value; - if (!_PyArg_ParseStack(args, nargs, "O&Z:CreateKey", - clinic_HKEY_converter, &key, &sub_key)) { + if (!_PyArg_CheckPositional("CreateKey", nargs, 2, 2)) { + goto exit; + } + if (!clinic_HKEY_converter(args[0], &key)) { + goto exit; + } + if (args[1] == Py_None) { + sub_key = NULL; + } + else if (PyUnicode_Check(args[1])) { + #if USE_UNICODE_WCHAR_CACHE + sub_key = _PyUnicode_AsUnicode(args[1]); + #else /* USE_UNICODE_WCHAR_CACHE */ + sub_key = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (sub_key == NULL) { + goto exit; + } + } + else { + _PyArg_BadArgument("CreateKey", 2, "str or None", args[1]); goto exit; } _return_value = winreg_CreateKey_impl(module, key, sub_key); @@ -210,6 +253,11 @@ winreg_CreateKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs) return_value = PyHKEY_FromHKEY(_return_value); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -251,7 +299,7 @@ winreg_CreateKeyEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py { PyObject *return_value = NULL; static const char * const _keywords[] = {"key", "sub_key", "reserved", "access", NULL}; - static _PyArg_Parser _parser = {"O&Z|ii:CreateKeyEx", _keywords, 0}; + static _PyArg_Parser _parser = {"O&O&|ii:CreateKeyEx", _keywords, 0}; HKEY key; const Py_UNICODE *sub_key; int reserved = 0; @@ -259,7 +307,7 @@ winreg_CreateKeyEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py HKEY _return_value; if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - clinic_HKEY_converter, &key, &sub_key, &reserved, &access)) { + clinic_HKEY_converter, &key, _PyUnicode_WideCharString_Opt_Converter, &sub_key, &reserved, &access)) { goto exit; } _return_value = winreg_CreateKeyEx_impl(module, key, sub_key, reserved, access); @@ -269,6 +317,11 @@ winreg_CreateKeyEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py return_value = PyHKEY_FromHKEY(_return_value); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -303,13 +356,32 @@ winreg_DeleteKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs) HKEY key; const Py_UNICODE *sub_key; - if (!_PyArg_ParseStack(args, nargs, "O&u:DeleteKey", - clinic_HKEY_converter, &key, &sub_key)) { + if (!_PyArg_CheckPositional("DeleteKey", nargs, 2, 2)) { + goto exit; + } + if (!clinic_HKEY_converter(args[0], &key)) { + goto exit; + } + if (!PyUnicode_Check(args[1])) { + _PyArg_BadArgument("DeleteKey", 2, "str", args[1]); + goto exit; + } + #if USE_UNICODE_WCHAR_CACHE + sub_key = _PyUnicode_AsUnicode(args[1]); + #else /* USE_UNICODE_WCHAR_CACHE */ + sub_key = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (sub_key == NULL) { goto exit; } return_value = winreg_DeleteKey_impl(module, key, sub_key); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -351,19 +423,24 @@ winreg_DeleteKeyEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py { PyObject *return_value = NULL; static const char * const _keywords[] = {"key", "sub_key", "access", "reserved", NULL}; - static _PyArg_Parser _parser = {"O&u|ii:DeleteKeyEx", _keywords, 0}; + static _PyArg_Parser _parser = {"O&O&|ii:DeleteKeyEx", _keywords, 0}; HKEY key; const Py_UNICODE *sub_key; REGSAM access = KEY_WOW64_64KEY; int reserved = 0; if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - clinic_HKEY_converter, &key, &sub_key, &access, &reserved)) { + clinic_HKEY_converter, &key, _PyUnicode_WideCharString_Converter, &sub_key, &access, &reserved)) { goto exit; } return_value = winreg_DeleteKeyEx_impl(module, key, sub_key, access, reserved); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -391,13 +468,37 @@ winreg_DeleteValue(PyObject *module, PyObject *const *args, Py_ssize_t nargs) HKEY key; const Py_UNICODE *value; - if (!_PyArg_ParseStack(args, nargs, "O&Z:DeleteValue", - clinic_HKEY_converter, &key, &value)) { + if (!_PyArg_CheckPositional("DeleteValue", nargs, 2, 2)) { + goto exit; + } + if (!clinic_HKEY_converter(args[0], &key)) { + goto exit; + } + if (args[1] == Py_None) { + value = NULL; + } + else if (PyUnicode_Check(args[1])) { + #if USE_UNICODE_WCHAR_CACHE + value = _PyUnicode_AsUnicode(args[1]); + #else /* USE_UNICODE_WCHAR_CACHE */ + value = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (value == NULL) { + goto exit; + } + } + else { + _PyArg_BadArgument("DeleteValue", 2, "str or None", args[1]); goto exit; } return_value = winreg_DeleteValue_impl(module, key, value); exit: + /* Cleanup for value */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)value); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -527,12 +628,26 @@ winreg_ExpandEnvironmentStrings(PyObject *module, PyObject *arg) PyObject *return_value = NULL; const Py_UNICODE *string; - if (!PyArg_Parse(arg, "u:ExpandEnvironmentStrings", &string)) { + if (!PyUnicode_Check(arg)) { + _PyArg_BadArgument("ExpandEnvironmentStrings", 0, "str", arg); + goto exit; + } + #if USE_UNICODE_WCHAR_CACHE + string = _PyUnicode_AsUnicode(arg); + #else /* USE_UNICODE_WCHAR_CACHE */ + string = PyUnicode_AsWideCharString(arg, NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (string == NULL) { goto exit; } return_value = winreg_ExpandEnvironmentStrings_impl(module, string); exit: + /* Cleanup for string */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)string); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -619,13 +734,48 @@ winreg_LoadKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs) const Py_UNICODE *sub_key; const Py_UNICODE *file_name; - if (!_PyArg_ParseStack(args, nargs, "O&uu:LoadKey", - clinic_HKEY_converter, &key, &sub_key, &file_name)) { + if (!_PyArg_CheckPositional("LoadKey", nargs, 3, 3)) { + goto exit; + } + if (!clinic_HKEY_converter(args[0], &key)) { + goto exit; + } + if (!PyUnicode_Check(args[1])) { + _PyArg_BadArgument("LoadKey", 2, "str", args[1]); + goto exit; + } + #if USE_UNICODE_WCHAR_CACHE + sub_key = _PyUnicode_AsUnicode(args[1]); + #else /* USE_UNICODE_WCHAR_CACHE */ + sub_key = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (sub_key == NULL) { + goto exit; + } + if (!PyUnicode_Check(args[2])) { + _PyArg_BadArgument("LoadKey", 3, "str", args[2]); + goto exit; + } + #if USE_UNICODE_WCHAR_CACHE + file_name = _PyUnicode_AsUnicode(args[2]); + #else /* USE_UNICODE_WCHAR_CACHE */ + file_name = PyUnicode_AsWideCharString(args[2], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (file_name == NULL) { goto exit; } return_value = winreg_LoadKey_impl(module, key, sub_key, file_name); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + /* Cleanup for file_name */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)file_name); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -660,7 +810,7 @@ winreg_OpenKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObje { PyObject *return_value = NULL; static const char * const _keywords[] = {"key", "sub_key", "reserved", "access", NULL}; - static _PyArg_Parser _parser = {"O&Z|ii:OpenKey", _keywords, 0}; + static _PyArg_Parser _parser = {"O&O&|ii:OpenKey", _keywords, 0}; HKEY key; const Py_UNICODE *sub_key; int reserved = 0; @@ -668,7 +818,7 @@ winreg_OpenKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObje HKEY _return_value; if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - clinic_HKEY_converter, &key, &sub_key, &reserved, &access)) { + clinic_HKEY_converter, &key, _PyUnicode_WideCharString_Opt_Converter, &sub_key, &reserved, &access)) { goto exit; } _return_value = winreg_OpenKey_impl(module, key, sub_key, reserved, access); @@ -678,6 +828,11 @@ winreg_OpenKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObje return_value = PyHKEY_FromHKEY(_return_value); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -712,7 +867,7 @@ winreg_OpenKeyEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyOb { PyObject *return_value = NULL; static const char * const _keywords[] = {"key", "sub_key", "reserved", "access", NULL}; - static _PyArg_Parser _parser = {"O&Z|ii:OpenKeyEx", _keywords, 0}; + static _PyArg_Parser _parser = {"O&O&|ii:OpenKeyEx", _keywords, 0}; HKEY key; const Py_UNICODE *sub_key; int reserved = 0; @@ -720,7 +875,7 @@ winreg_OpenKeyEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyOb HKEY _return_value; if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - clinic_HKEY_converter, &key, &sub_key, &reserved, &access)) { + clinic_HKEY_converter, &key, _PyUnicode_WideCharString_Opt_Converter, &sub_key, &reserved, &access)) { goto exit; } _return_value = winreg_OpenKeyEx_impl(module, key, sub_key, reserved, access); @@ -730,6 +885,11 @@ winreg_OpenKeyEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyOb return_value = PyHKEY_FromHKEY(_return_value); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -802,13 +962,37 @@ winreg_QueryValue(PyObject *module, PyObject *const *args, Py_ssize_t nargs) HKEY key; const Py_UNICODE *sub_key; - if (!_PyArg_ParseStack(args, nargs, "O&Z:QueryValue", - clinic_HKEY_converter, &key, &sub_key)) { + if (!_PyArg_CheckPositional("QueryValue", nargs, 2, 2)) { + goto exit; + } + if (!clinic_HKEY_converter(args[0], &key)) { + goto exit; + } + if (args[1] == Py_None) { + sub_key = NULL; + } + else if (PyUnicode_Check(args[1])) { + #if USE_UNICODE_WCHAR_CACHE + sub_key = _PyUnicode_AsUnicode(args[1]); + #else /* USE_UNICODE_WCHAR_CACHE */ + sub_key = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (sub_key == NULL) { + goto exit; + } + } + else { + _PyArg_BadArgument("QueryValue", 2, "str or None", args[1]); goto exit; } return_value = winreg_QueryValue_impl(module, key, sub_key); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -841,13 +1025,37 @@ winreg_QueryValueEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs) HKEY key; const Py_UNICODE *name; - if (!_PyArg_ParseStack(args, nargs, "O&Z:QueryValueEx", - clinic_HKEY_converter, &key, &name)) { + if (!_PyArg_CheckPositional("QueryValueEx", nargs, 2, 2)) { + goto exit; + } + if (!clinic_HKEY_converter(args[0], &key)) { + goto exit; + } + if (args[1] == Py_None) { + name = NULL; + } + else if (PyUnicode_Check(args[1])) { + #if USE_UNICODE_WCHAR_CACHE + name = _PyUnicode_AsUnicode(args[1]); + #else /* USE_UNICODE_WCHAR_CACHE */ + name = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (name == NULL) { + goto exit; + } + } + else { + _PyArg_BadArgument("QueryValueEx", 2, "str or None", args[1]); goto exit; } return_value = winreg_QueryValueEx_impl(module, key, name); exit: + /* Cleanup for name */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)name); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -885,13 +1093,32 @@ winreg_SaveKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs) HKEY key; const Py_UNICODE *file_name; - if (!_PyArg_ParseStack(args, nargs, "O&u:SaveKey", - clinic_HKEY_converter, &key, &file_name)) { + if (!_PyArg_CheckPositional("SaveKey", nargs, 2, 2)) { + goto exit; + } + if (!clinic_HKEY_converter(args[0], &key)) { + goto exit; + } + if (!PyUnicode_Check(args[1])) { + _PyArg_BadArgument("SaveKey", 2, "str", args[1]); + goto exit; + } + #if USE_UNICODE_WCHAR_CACHE + file_name = _PyUnicode_AsUnicode(args[1]); + #else /* USE_UNICODE_WCHAR_CACHE */ + file_name = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (file_name == NULL) { goto exit; } return_value = winreg_SaveKey_impl(module, key, file_name); exit: + /* Cleanup for file_name */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)file_name); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -925,25 +1152,23 @@ PyDoc_STRVAR(winreg_SetValue__doc__, {"SetValue", (PyCFunction)(void(*)(void))winreg_SetValue, METH_FASTCALL, winreg_SetValue__doc__}, static PyObject * -winreg_SetValue_impl(PyObject *module, HKEY key, const Py_UNICODE *sub_key, - DWORD type, const Py_UNICODE *value, - Py_ssize_clean_t value_length); +winreg_SetValue_impl(PyObject *module, HKEY key, PyObject *sub_key_obj, + DWORD type, PyObject *value_obj); static PyObject * winreg_SetValue(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; HKEY key; - const Py_UNICODE *sub_key; + PyObject *sub_key_obj; DWORD type; - const Py_UNICODE *value; - Py_ssize_clean_t value_length; + PyObject *value_obj; - if (!_PyArg_ParseStack(args, nargs, "O&Zku#:SetValue", - clinic_HKEY_converter, &key, &sub_key, &type, &value, &value_length)) { + if (!_PyArg_ParseStack(args, nargs, "O&OkU:SetValue", + clinic_HKEY_converter, &key, &sub_key_obj, &type, &value_obj)) { goto exit; } - return_value = winreg_SetValue_impl(module, key, sub_key, type, value, value_length); + return_value = winreg_SetValue_impl(module, key, sub_key_obj, type, value_obj); exit: return return_value; @@ -1010,13 +1235,18 @@ winreg_SetValueEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs) DWORD type; PyObject *value; - if (!_PyArg_ParseStack(args, nargs, "O&ZOkO:SetValueEx", - clinic_HKEY_converter, &key, &value_name, &reserved, &type, &value)) { + if (!_PyArg_ParseStack(args, nargs, "O&O&OkO:SetValueEx", + clinic_HKEY_converter, &key, _PyUnicode_WideCharString_Opt_Converter, &value_name, &reserved, &type, &value)) { goto exit; } return_value = winreg_SetValueEx_impl(module, key, value_name, reserved, type, value); exit: + /* Cleanup for value_name */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)value_name); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -1121,4 +1351,4 @@ winreg_QueryReflectionKey(PyObject *module, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=1204d20c543b5b4a input=a9049054013a1b77]*/ +/*[clinic end generated code: output=d32916ff805bd452 input=a9049054013a1b77]*/ diff --git a/PC/winreg.c b/PC/winreg.c index 3a6ea3689fd12a..cc4c75a8f7526b 100644 --- a/PC/winreg.c +++ b/PC/winreg.c @@ -621,16 +621,22 @@ Py2Reg(PyObject *value, DWORD typ, BYTE **retDataBuf, DWORD *retDataSize) for (j = 0; j < i; j++) { PyObject *t; - wchar_t *wstr; Py_ssize_t len; t = PyList_GET_ITEM(value, j); if (!PyUnicode_Check(t)) return FALSE; - wstr = PyUnicode_AsUnicodeAndSize(t, &len); - if (wstr == NULL) +#if USE_UNICODE_WCHAR_CACHE + len = PyUnicode_GetSize(t); + if (len < 0) return FALSE; - size += Py_SAFE_DOWNCAST((len + 1) * sizeof(wchar_t), + len++; +#else /* USE_UNICODE_WCHAR_CACHE */ + len = PyUnicode_AsWideChar(t, NULL, 0); + if (len < 0) + return FALSE; +#endif /* USE_UNICODE_WCHAR_CACHE */ + size += Py_SAFE_DOWNCAST(len * sizeof(wchar_t), size_t, DWORD); } @@ -646,17 +652,18 @@ Py2Reg(PyObject *value, DWORD typ, BYTE **retDataBuf, DWORD *retDataSize) for (j = 0; j < i; j++) { PyObject *t; - wchar_t *wstr; Py_ssize_t len; t = PyList_GET_ITEM(value, j); - wstr = PyUnicode_AsUnicodeAndSize(t, &len); - assert(wstr); - wcscpy(P, wstr); - P += (len + 1); + assert(size > 0); + len = PyUnicode_AsWideChar(t, P, size); + assert(len >= 0); + assert(len < size); + size -= (DWORD)len + 1; + P += len + 1; } /* And doubly-terminate the list... */ - *P = '\0'; + *P = L'\0'; break; } case REG_BINARY: @@ -1573,12 +1580,12 @@ winreg.SetValue key: HKEY An already open key, or any one of the predefined HKEY_* constants. - sub_key: Py_UNICODE(accept={str, NoneType}) + sub_key as sub_key_obj: object A string that names the subkey with which the value is associated. type: DWORD An integer that specifies the type of the data. Currently this must be REG_SZ, meaning only strings are supported. - value: Py_UNICODE(zeroes=True) + value as value_obj: unicode A string that specifies the new value. / @@ -1596,11 +1603,13 @@ KEY_SET_VALUE access. [clinic start generated code]*/ static PyObject * -winreg_SetValue_impl(PyObject *module, HKEY key, const Py_UNICODE *sub_key, - DWORD type, const Py_UNICODE *value, - Py_ssize_clean_t value_length) -/*[clinic end generated code: output=686bedb1cbb4367b input=2cd2adab79339c53]*/ +winreg_SetValue_impl(PyObject *module, HKEY key, PyObject *sub_key_obj, + DWORD type, PyObject *value_obj) +/*[clinic end generated code: output=e1c0674b77ba195b input=525af06a623c9ad9]*/ { + wchar_t *sub_key = NULL; + wchar_t *value = NULL; + Py_ssize_t value_length; long rc; if (type != REG_SZ) { @@ -1609,9 +1618,41 @@ winreg_SetValue_impl(PyObject *module, HKEY key, const Py_UNICODE *sub_key, return NULL; } +#if USE_UNICODE_WCHAR_CACHE + if (!_PyUnicode_UNICODE_Converter(sub_key_obj, &sub_key)) { + return NULL; + } + value = PyUnicode_AsUnicodeAndSize(value_obj, &value_length); + if (value == NULL) { + return NULL; + } +#else /* USE_UNICODE_WCHAR_CACHE */ + if (!_PyUnicode_WideCharString_Converter(sub_key_obj, &sub_key)) { + return NULL; + } + value = PyUnicode_AsWideCharString(value_obj, &value_length); +#endif /* USE_UNICODE_WCHAR_CACHE */ + if (value == NULL) { +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(sub_key); +#endif /* USE_UNICODE_WCHAR_CACHE */ + return NULL; + } + if ((Py_ssize_t)(DWORD)value_length != value_length) { + PyErr_SetString(PyExc_OverflowError, "too long string"); +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(sub_key); +#endif /* USE_UNICODE_WCHAR_CACHE */ + return NULL; + } + Py_BEGIN_ALLOW_THREADS - rc = RegSetValueW(key, sub_key, REG_SZ, value, value_length+1); + rc = RegSetValueW(key, sub_key, REG_SZ, value, (DWORD)value_length+1); Py_END_ALLOW_THREADS +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(value); + PyMem_Free(sub_key); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (rc != ERROR_SUCCESS) return PyErr_SetFromWindowsErrWithFunction(rc, "RegSetValue"); Py_RETURN_NONE; diff --git a/Python/dynload_win.c b/Python/dynload_win.c index 36918c3579d92d..050c561d3d8d3a 100644 --- a/Python/dynload_win.c +++ b/Python/dynload_win.c @@ -190,13 +190,16 @@ dl_funcptr _PyImport_FindSharedFuncptrWindows(const char *prefix, { dl_funcptr p; char funcname[258], *import_python; - const wchar_t *wpathname; #ifndef _DEBUG _Py_CheckPython3(); #endif - wpathname = _PyUnicode_AsUnicode(pathname); +#if USE_UNICODE_WCHAR_CACHE + const wchar_t *wpathname = _PyUnicode_AsUnicode(pathname); +#else /* USE_UNICODE_WCHAR_CACHE */ + wchar_t *wpathname = PyUnicode_AsWideCharString(pathname, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (wpathname == NULL) return NULL; @@ -225,6 +228,9 @@ dl_funcptr _PyImport_FindSharedFuncptrWindows(const char *prefix, #if HAVE_SXS _Py_DeactivateActCtx(cookie); #endif +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(wpathname); +#endif /* USE_UNICODE_WCHAR_CACHE */ /* restore old error mode settings */ SetErrorMode(old_mode); diff --git a/Python/fileutils.c b/Python/fileutils.c index 75e015afaec32b..44961b08afbb9b 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -1003,15 +1003,21 @@ _Py_stat(PyObject *path, struct stat *statbuf) #ifdef MS_WINDOWS int err; struct _stat wstatbuf; - const wchar_t *wpath; - wpath = _PyUnicode_AsUnicode(path); +#if USE_UNICODE_WCHAR_CACHE + const wchar_t *wpath = _PyUnicode_AsUnicode(path); +#else /* USE_UNICODE_WCHAR_CACHE */ + wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (wpath == NULL) return -2; err = _wstat(wpath, &wstatbuf); if (!err) statbuf->st_mode = wstatbuf.st_mode; +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(wpath); +#endif /* USE_UNICODE_WCHAR_CACHE */ return err; #else int ret; @@ -1395,7 +1401,6 @@ _Py_fopen_obj(PyObject *path, const char *mode) FILE *f; int async_err = 0; #ifdef MS_WINDOWS - const wchar_t *wpath; wchar_t wmode[10]; int usize; @@ -1407,7 +1412,11 @@ _Py_fopen_obj(PyObject *path, const char *mode) Py_TYPE(path)); return NULL; } - wpath = _PyUnicode_AsUnicode(path); +#if USE_UNICODE_WCHAR_CACHE + const wchar_t *wpath = _PyUnicode_AsUnicode(path); +#else /* USE_UNICODE_WCHAR_CACHE */ + wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (wpath == NULL) return NULL; @@ -1424,6 +1433,9 @@ _Py_fopen_obj(PyObject *path, const char *mode) Py_END_ALLOW_THREADS } while (f == NULL && errno == EINTR && !(async_err = PyErr_CheckSignals())); +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(wpath); +#endif /* USE_UNICODE_WCHAR_CACHE */ #else PyObject *bytes; char *path_bytes; diff --git a/Python/getargs.c b/Python/getargs.c index 693a29cced4239..c01042c3e7771f 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -1070,6 +1070,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'u': /* raw unicode buffer (Py_UNICODE *) */ case 'Z': /* raw unicode buffer or None */ { +#if HAVE_UNICODE_WCHAR_CACHE Py_UNICODE **p = va_arg(*p_va, Py_UNICODE **); if (*format == '#') { @@ -1108,6 +1109,12 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, return converterr(c == 'Z' ? "str or None" : "str", arg, msgbuf, bufsize); } +#else /* HAVE_UNICODE_WCHAR_CACHE */ + PyErr_Format(PyExc_SystemError, + "unsupported PyArg_Parse format: \"%s\"", + format-1); + RETURN_ERR_OCCURRED; +#endif /* HAVE_UNICODE_WCHAR_CACHE */ break; } diff --git a/Python/traceback.c b/Python/traceback.c index bd1061ed43b1e1..709be52de7243d 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -674,7 +674,9 @@ _Py_DumpASCII(int fd, PyObject *text) int truncated; int kind; void *data = NULL; +#if USE_UNICODE_WCHAR_CACHE wchar_t *wstr = NULL; +#endif /* USE_UNICODE_WCHAR_CACHE */ Py_UCS4 ch; if (!PyUnicode_Check(text)) @@ -682,13 +684,16 @@ _Py_DumpASCII(int fd, PyObject *text) size = ascii->length; kind = ascii->state.kind; +#if USE_UNICODE_WCHAR_CACHE if (kind == PyUnicode_WCHAR_KIND) { wstr = ((PyASCIIObject *)text)->wstr; if (wstr == NULL) return; size = ((PyCompactUnicodeObject *)text)->wstr_length; } - else if (ascii->state.compact) { + else +#endif /* USE_UNICODE_WCHAR_CACHE */ + if (ascii->state.compact) { if (ascii->state.ascii) data = ((PyASCIIObject*)text) + 1; else @@ -709,10 +714,12 @@ _Py_DumpASCII(int fd, PyObject *text) } for (i=0; i < size; i++) { - if (kind != PyUnicode_WCHAR_KIND) - ch = PyUnicode_READ(kind, data, i); - else +#if USE_UNICODE_WCHAR_CACHE + if (kind == PyUnicode_WCHAR_KIND) ch = wstr[i]; + else +#endif /* USE_UNICODE_WCHAR_CACHE */ + ch = PyUnicode_READ(kind, data, i); if (' ' <= ch && ch <= 126) { /* printable ASCII character */ char c = (char)ch; diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py index cb2ded4649dce2..2969b4b44431c8 100755 --- a/Tools/clinic/clinic.py +++ b/Tools/clinic/clinic.py @@ -3351,20 +3351,75 @@ def parse_arg(self, argname, argnum): """.format(argname=argname, paramname=self.name, argnum=argnum) return super().parse_arg(argname, argnum) +@add_legacy_c_converter('u') @add_legacy_c_converter('u#', zeroes=True) @add_legacy_c_converter('Z', accept={str, NoneType}) @add_legacy_c_converter('Z#', accept={str, NoneType}, zeroes=True) class Py_UNICODE_converter(CConverter): type = 'const Py_UNICODE *' default_type = (str, Null, NoneType) - format_unit = 'u' def converter_init(self, *, accept={str}, zeroes=False): format_unit = 'Z' if accept=={str, NoneType} else 'u' if zeroes: format_unit += '#' self.length = True - self.format_unit = format_unit + self.format_unit = format_unit + else: + self.accept = accept + if accept == {str}: + self.converter = '_PyUnicode_WideCharString_Converter' + elif accept == {str, NoneType}: + self.converter = '_PyUnicode_WideCharString_Opt_Converter' + else: + fail("Py_UNICODE_converter: illegal 'accept' argument " + repr(accept)) + + def cleanup(self): + if not self.length: + return """\ +#if !USE_UNICODE_WCHAR_CACHE +PyMem_Free((void *){name}); +#endif /* USE_UNICODE_WCHAR_CACHE */ +""".format(name=self.name) + + def parse_arg(self, argname, argnum): + if not self.length: + if self.accept == {str}: + return """ + if (!PyUnicode_Check({argname})) {{{{ + _PyArg_BadArgument("{{name}}", {argnum}, "str", {argname}); + goto exit; + }}}} + #if USE_UNICODE_WCHAR_CACHE + {paramname} = _PyUnicode_AsUnicode({argname}); + #else /* USE_UNICODE_WCHAR_CACHE */ + {paramname} = PyUnicode_AsWideCharString({argname}, NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if ({paramname} == NULL) {{{{ + goto exit; + }}}} + """.format(argname=argname, paramname=self.name, argnum=argnum) + elif self.accept == {str, NoneType}: + return """ + if ({argname} == Py_None) {{{{ + {paramname} = NULL; + }}}} + else if (PyUnicode_Check({argname})) {{{{ + #if USE_UNICODE_WCHAR_CACHE + {paramname} = _PyUnicode_AsUnicode({argname}); + #else /* USE_UNICODE_WCHAR_CACHE */ + {paramname} = PyUnicode_AsWideCharString({argname}, NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if ({paramname} == NULL) {{{{ + goto exit; + }}}} + }}}} + else {{{{ + _PyArg_BadArgument("{{name}}", {argnum}, "str or None", {argname}); + goto exit; + }}}} + """.format(argname=argname, paramname=self.name, argnum=argnum) + return super().parse_arg(argname, argnum) @add_legacy_c_converter('s*', accept={str, buffer}) @add_legacy_c_converter('z*', accept={str, buffer, NoneType}) From 60b89c9a7425ab99919d5f7ce9b71397ebe4c653 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 19 Mar 2019 11:48:43 +0200 Subject: [PATCH 02/17] Fix winreg.SetValue(). --- PC/clinic/winreg.c.h | 17 +++++++++++------ PC/winreg.c | 27 ++++++--------------------- 2 files changed, 17 insertions(+), 27 deletions(-) diff --git a/PC/clinic/winreg.c.h b/PC/clinic/winreg.c.h index 666108e481d292..cb7a4564713e55 100644 --- a/PC/clinic/winreg.c.h +++ b/PC/clinic/winreg.c.h @@ -1152,7 +1152,7 @@ PyDoc_STRVAR(winreg_SetValue__doc__, {"SetValue", (PyCFunction)(void(*)(void))winreg_SetValue, METH_FASTCALL, winreg_SetValue__doc__}, static PyObject * -winreg_SetValue_impl(PyObject *module, HKEY key, PyObject *sub_key_obj, +winreg_SetValue_impl(PyObject *module, HKEY key, const Py_UNICODE *sub_key, DWORD type, PyObject *value_obj); static PyObject * @@ -1160,17 +1160,22 @@ winreg_SetValue(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; HKEY key; - PyObject *sub_key_obj; + const Py_UNICODE *sub_key; DWORD type; PyObject *value_obj; - if (!_PyArg_ParseStack(args, nargs, "O&OkU:SetValue", - clinic_HKEY_converter, &key, &sub_key_obj, &type, &value_obj)) { + if (!_PyArg_ParseStack(args, nargs, "O&O&kU:SetValue", + clinic_HKEY_converter, &key, _PyUnicode_WideCharString_Opt_Converter, &sub_key, &type, &value_obj)) { goto exit; } - return_value = winreg_SetValue_impl(module, key, sub_key_obj, type, value_obj); + return_value = winreg_SetValue_impl(module, key, sub_key, type, value_obj); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -1351,4 +1356,4 @@ winreg_QueryReflectionKey(PyObject *module, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=d32916ff805bd452 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=d710dde7327c59e7 input=a9049054013a1b77]*/ diff --git a/PC/winreg.c b/PC/winreg.c index cc4c75a8f7526b..20d298b37e22f8 100644 --- a/PC/winreg.c +++ b/PC/winreg.c @@ -1580,7 +1580,7 @@ winreg.SetValue key: HKEY An already open key, or any one of the predefined HKEY_* constants. - sub_key as sub_key_obj: object + sub_key: Py_UNICODE(accept={str, NoneType}) A string that names the subkey with which the value is associated. type: DWORD An integer that specifies the type of the data. Currently this must @@ -1603,12 +1603,10 @@ KEY_SET_VALUE access. [clinic start generated code]*/ static PyObject * -winreg_SetValue_impl(PyObject *module, HKEY key, PyObject *sub_key_obj, +winreg_SetValue_impl(PyObject *module, HKEY key, const Py_UNICODE *sub_key, DWORD type, PyObject *value_obj) -/*[clinic end generated code: output=e1c0674b77ba195b input=525af06a623c9ad9]*/ +/*[clinic end generated code: output=d4773dc9c372311a input=bf088494ae2d24fd]*/ { - wchar_t *sub_key = NULL; - wchar_t *value = NULL; Py_ssize_t value_length; long rc; @@ -1619,29 +1617,17 @@ winreg_SetValue_impl(PyObject *module, HKEY key, PyObject *sub_key_obj, } #if USE_UNICODE_WCHAR_CACHE - if (!_PyUnicode_UNICODE_Converter(sub_key_obj, &sub_key)) { - return NULL; - } - value = PyUnicode_AsUnicodeAndSize(value_obj, &value_length); - if (value == NULL) { - return NULL; - } + const wchar_t *value = PyUnicode_AsUnicodeAndSize(value_obj, &value_length); #else /* USE_UNICODE_WCHAR_CACHE */ - if (!_PyUnicode_WideCharString_Converter(sub_key_obj, &sub_key)) { - return NULL; - } - value = PyUnicode_AsWideCharString(value_obj, &value_length); + wchar_t *value = PyUnicode_AsWideCharString(value_obj, &value_length); #endif /* USE_UNICODE_WCHAR_CACHE */ if (value == NULL) { -#if !USE_UNICODE_WCHAR_CACHE - PyMem_Free(sub_key); -#endif /* USE_UNICODE_WCHAR_CACHE */ return NULL; } if ((Py_ssize_t)(DWORD)value_length != value_length) { PyErr_SetString(PyExc_OverflowError, "too long string"); #if !USE_UNICODE_WCHAR_CACHE - PyMem_Free(sub_key); + PyMem_Free(value); #endif /* USE_UNICODE_WCHAR_CACHE */ return NULL; } @@ -1651,7 +1637,6 @@ winreg_SetValue_impl(PyObject *module, HKEY key, PyObject *sub_key_obj, Py_END_ALLOW_THREADS #if !USE_UNICODE_WCHAR_CACHE PyMem_Free(value); - PyMem_Free(sub_key); #endif /* USE_UNICODE_WCHAR_CACHE */ if (rc != ERROR_SUCCESS) return PyErr_SetFromWindowsErrWithFunction(rc, "RegSetValue"); From f386b63e143e0b2d507957695dc2f3a2d9054bc6 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 19 Mar 2019 12:02:51 +0200 Subject: [PATCH 03/17] Clean up some ifdefs in _testcapimodule. --- Modules/_testcapimodule.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index ddbc3dc80e1a19..b8b0392a3dc155 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1410,7 +1410,6 @@ getargs_y_hash(PyObject *self, PyObject *args) return PyBytes_FromStringAndSize(str, size); } -//#if USE_UNICODE_WCHAR_CACHE static PyObject * getargs_u(PyObject *self, PyObject *args) { @@ -1454,7 +1453,6 @@ getargs_Z_hash(PyObject *self, PyObject *args) else Py_RETURN_NONE; } -// #endif /* USE_UNICODE_WCHAR_CACHE */ static PyObject * getargs_es(PyObject *self, PyObject *args) @@ -4817,12 +4815,10 @@ static PyMethodDef TestMethods[] = { {"getargs_y", getargs_y, METH_VARARGS}, {"getargs_y_star", getargs_y_star, METH_VARARGS}, {"getargs_y_hash", getargs_y_hash, METH_VARARGS}, -// #if USE_UNICODE_WCHAR_CACHE {"getargs_u", getargs_u, METH_VARARGS}, {"getargs_u_hash", getargs_u_hash, METH_VARARGS}, {"getargs_Z", getargs_Z, METH_VARARGS}, {"getargs_Z_hash", getargs_Z_hash, METH_VARARGS}, -// #endif /* USE_UNICODE_WCHAR_CACHE */ {"getargs_w_star", getargs_w_star, METH_VARARGS}, {"getargs_es", getargs_es, METH_VARARGS}, {"getargs_et", getargs_et, METH_VARARGS}, @@ -4846,8 +4842,6 @@ static PyMethodDef TestMethods[] = { #if USE_UNICODE_WCHAR_CACHE {"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS}, {"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS}, -#endif /* USE_UNICODE_WCHAR_CACHE */ -#if USE_UNICODE_WCHAR_CACHE {"unicode_legacy_string", unicode_legacy_string, METH_VARARGS}, #endif /* USE_UNICODE_WCHAR_CACHE */ {"_test_thread_state", test_thread_state, METH_VARARGS}, From 11e0e0c7baa1b6ae8d2815d5767f7c296c4e7f7d Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 19 Mar 2019 12:17:39 +0200 Subject: [PATCH 04/17] Make path_cleanup() paranoidally safer. --- Modules/posixmodule.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index f225d70f91cbaa..0bde61a357e273 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -901,8 +901,9 @@ static void path_cleanup(path_t *path) { #if !USE_UNICODE_WCHAR_CACHE - PyMem_Free((wchar_t *)path->wide); + wchar_t *wide = (wchar_t *)path->wide; path->wide = NULL; + PyMem_Free(wide); #endif /* USE_UNICODE_WCHAR_CACHE */ Py_CLEAR(path->object); Py_CLEAR(path->cleanup); From 8750d48b6de939a4ece79037519c21196c0a35ed Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 20 Mar 2019 21:37:04 +0200 Subject: [PATCH 05/17] Fix os.scandir(). --- Modules/posixmodule.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 0bde61a357e273..b36a7a7f2ef1a3 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -12236,11 +12236,7 @@ DirEntry_fetch_stat(DirEntry *self, int follow_symlinks) result = fstatat(self->dir_fd, path, &st, follow_symlinks ? 0 : AT_SYMLINK_NOFOLLOW); #else -#if defined(MS_WINDOWS) && !USE_UNICODE_WCHAR_CACHE - PyMem_Free(path); -#else /* USE_UNICODE_WCHAR_CACHE */ Py_DECREF(ub); -#endif /* USE_UNICODE_WCHAR_CACHE */ PyErr_SetString(PyExc_NotImplementedError, "can't fetch stat"); return NULL; #endif /* HAVE_FSTATAT */ @@ -13053,9 +13049,9 @@ os_scandir_impl(PyObject *module, path_t *path) #else /* POSIX */ errno = 0; #ifdef HAVE_FDOPENDIR - if (path->fd != -1) { + if (iterator->path.fd != -1) { /* closedir() closes the FD, so we duplicate it */ - fd = _Py_dup(path->fd); + fd = _Py_dup(iterator->path.fd); if (fd == -1) goto error; From 3b5294a2514218c8288ae4152acba1323aa111d7 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 30 Jun 2020 09:25:51 +0300 Subject: [PATCH 06/17] Silence compiler warnings. --- Modules/arraymodule.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index 3a978d1ace14c2..191d23346be369 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -1692,10 +1692,13 @@ array_array_fromunicode_impl(arrayobject *self, PyObject *arg) } #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS len = PyUnicode_GetSize(arg); if (len < 0) { return NULL; } +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ len = PyUnicode_AsWideChar(arg, NULL, 0); if (len < 0) { @@ -2695,6 +2698,8 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) else if (initial != NULL && PyUnicode_Check(initial)) { Py_ssize_t n; #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS Py_UNICODE *ustr; ustr = PyUnicode_AsUnicode(initial); @@ -2719,6 +2724,7 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) memcpy(item, ustr, n); self->allocated = Py_SIZE(self); } +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ n = PyUnicode_AsWideChar(initial, NULL, 0); if (n < 0) { From e7898fa7d4c41c452bb283f1e41338dc18c3206d Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 30 Jun 2020 09:43:47 +0300 Subject: [PATCH 07/17] Fix PyUnicode_IsIdentifier for the cache-less build. --- Lib/test/test_unicode.py | 2 ++ Objects/unicodeobject.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index cf27d6750b8534..e47a416d5370f7 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -721,6 +721,7 @@ def test_isidentifier(self): self.assertFalse("0".isidentifier()) @support.cpython_only + @support.requires_legacy_unicode_capi def test_isidentifier_legacy(self): import _testcapi u = '𝖀𝖓𝖎𝖈𝖔𝖉𝖊' @@ -2346,6 +2347,7 @@ def test_getnewargs(self): self.assertEqual(len(args), 1) @support.cpython_only + @support.requires_legacy_unicode_capi def test_resize(self): from _testcapi import getargs_u for length in range(1, 100, 7): diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index bcc75ef6885ff4..6a2ad437e87bd3 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12549,11 +12549,14 @@ _PyUnicode_ScanIdentifier(PyObject *self) int PyUnicode_IsIdentifier(PyObject *self) { +#if HAVE_UNICODE_WCHAR_CACHE if (PyUnicode_IS_READY(self)) { +#endif /* HAVE_UNICODE_WCHAR_CACHE */ Py_ssize_t i = _PyUnicode_ScanIdentifier(self); Py_ssize_t len = PyUnicode_GET_LENGTH(self); /* an empty string is not a valid identifier */ return len && i == len; +#if HAVE_UNICODE_WCHAR_CACHE } else { _Py_COMP_DIAG_PUSH @@ -12597,6 +12600,7 @@ _Py_COMP_DIAG_IGNORE_DEPR_DECLS return 1; _Py_COMP_DIAG_POP } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ } /*[clinic input] From fd641c69a44883fc54a9bae1c474d949806392f2 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 30 Jun 2020 10:27:42 +0300 Subject: [PATCH 08/17] Silence compiler warnings on Windows. --- Modules/_ctypes/callproc.c | 3 +++ Modules/_io/fileio.c | 3 +++ Modules/overlapped.c | 11 ++++++++++- Modules/posixmodule.c | 12 ++++++++++++ Objects/unicodeobject.c | 6 ++++++ PC/_msi.c | 6 ++++++ PC/winreg.c | 6 ++++++ Python/dynload_win.c | 3 +++ Python/fileutils.c | 6 ++++++ 9 files changed, 55 insertions(+), 1 deletion(-) diff --git a/Modules/_ctypes/callproc.c b/Modules/_ctypes/callproc.c index ce1d41c9079088..b4a493ba47d1fd 100644 --- a/Modules/_ctypes/callproc.c +++ b/Modules/_ctypes/callproc.c @@ -1309,7 +1309,10 @@ static PyObject *load_library(PyObject *self, PyObject *args) return NULL; #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS const WCHAR *name = _PyUnicode_AsUnicode(nameobj); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ WCHAR *name = PyUnicode_AsWideCharString(nameobj, NULL); #endif /* USE_UNICODE_WCHAR_CACHE */ diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index e44e4f30f805c3..b9856b3b631657 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -271,7 +271,10 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode, return -1; } #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS widename = PyUnicode_AsUnicode(stringobj); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ widename = PyUnicode_AsWideCharString(stringobj, NULL); #endif /* USE_UNICODE_WCHAR_CACHE */ diff --git a/Modules/overlapped.c b/Modules/overlapped.c index e7f72b3e070004..95bdfbfdff0393 100644 --- a/Modules/overlapped.c +++ b/Modules/overlapped.c @@ -386,7 +386,10 @@ overlapped_CreateEvent(PyObject *self, PyObject *args) } else if (PyUnicode_Check(Name_obj)) { #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS Name = (wchar_t *)_PyUnicode_AsUnicode(Name_obj); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ Name = PyUnicode_AsWideCharString(Name_obj, NULL); #endif /* USE_UNICODE_WCHAR_CACHE */ @@ -395,7 +398,7 @@ overlapped_CreateEvent(PyObject *self, PyObject *args) } } else { - _PyArg_BadArgument("CreateEvent", 4, "str or None", Name_obj); + _PyArg_BadArgument("CreateEvent", "argument 4", "str or None", Name_obj); return NULL; } @@ -1256,7 +1259,10 @@ parse_address(PyObject *obj, SOCKADDR *Address, int Length) return -1; } #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS Host = (wchar_t *)_PyUnicode_AsUnicode(Host_obj); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ Host = PyUnicode_AsWideCharString(Host_obj, NULL); #endif /* USE_UNICODE_WCHAR_CACHE */ @@ -1284,7 +1290,10 @@ parse_address(PyObject *obj, SOCKADDR *Address, int Length) return -1; } #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS Host = (wchar_t *)_PyUnicode_AsUnicode(Host_obj); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ Host = PyUnicode_AsWideCharString(Host_obj, NULL); #endif /* USE_UNICODE_WCHAR_CACHE */ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index e83c3477719768..efd99544f5a997 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -1081,7 +1081,10 @@ path_converter(PyObject *o, void *p) if (is_unicode) { #ifdef MS_WINDOWS #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS wide = PyUnicode_AsUnicodeAndSize(o, &length); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ wide = PyUnicode_AsWideCharString(o, &length); #endif /* USE_UNICODE_WCHAR_CACHE */ @@ -1179,7 +1182,10 @@ path_converter(PyObject *o, void *p) } #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS wide = PyUnicode_AsUnicodeAndSize(wo, &length); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ wide = PyUnicode_AsWideCharString(wo, &length); Py_DECREF(wo); @@ -12850,7 +12856,10 @@ DirEntry_fetch_stat(PyObject *module, DirEntry *self, int follow_symlinks) if (!PyUnicode_FSDecoder(self->path, &ub)) return NULL; #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS const wchar_t *path = PyUnicode_AsUnicode(ub); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ wchar_t *path = PyUnicode_AsWideCharString(ub, NULL); Py_DECREF(ub); @@ -13076,9 +13085,12 @@ os_DirEntry_inode_impl(DirEntry *self) if (!PyUnicode_FSDecoder(self->path, &unicode)) return NULL; #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS const wchar_t *path = PyUnicode_AsUnicode(unicode); result = LSTAT(path, &stat); Py_DECREF(unicode); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ wchar_t *path = PyUnicode_AsWideCharString(unicode, NULL); Py_DECREF(unicode); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 6a2ad437e87bd3..33d359f08788ce 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4468,9 +4468,12 @@ unicode_decode_call_errorhandler_wchar( } #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS repwlen = PyUnicode_GetSize(repunicode); if (repwlen < 0) goto onError; +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ repwlen = PyUnicode_AsWideChar(repunicode, NULL, 0); if (repwlen < 0) @@ -7811,11 +7814,14 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes, if (substring == NULL) return -1; #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS p = PyUnicode_AsUnicodeAndSize(substring, &size); if (p == NULL) { Py_DECREF(substring); return -1; } +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ p = PyUnicode_AsWideCharString(substring, &size); Py_CLEAR(substring); diff --git a/PC/_msi.c b/PC/_msi.c index 5f105ed899f25d..3444c9be849c5d 100644 --- a/PC/_msi.c +++ b/PC/_msi.c @@ -428,7 +428,10 @@ record_setstring(msiobj* record, PyObject *args) return NULL; #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS data = (wchar_t *)_PyUnicode_AsUnicode(data_obj); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ data = PyUnicode_AsWideCharString(data_obj, NULL); #endif /* USE_UNICODE_WCHAR_CACHE */ @@ -636,7 +639,10 @@ summary_setproperty(msiobj* si, PyObject *args) if (PyUnicode_Check(data)) { #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS const WCHAR *value = _PyUnicode_AsUnicode(data); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ WCHAR *value = PyUnicode_AsWideCharString(data, NULL); #endif /* USE_UNICODE_WCHAR_CACHE */ diff --git a/PC/winreg.c b/PC/winreg.c index 7e9b6d08bfeea4..2581a4694d4156 100644 --- a/PC/winreg.c +++ b/PC/winreg.c @@ -646,10 +646,13 @@ Py2Reg(PyObject *value, DWORD typ, BYTE **retDataBuf, DWORD *retDataSize) if (!PyUnicode_Check(t)) return FALSE; #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS len = PyUnicode_GetSize(t); if (len < 0) return FALSE; len++; +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ len = PyUnicode_AsWideChar(t, NULL, 0); if (len < 0) @@ -1707,7 +1710,10 @@ winreg_SetValue_impl(PyObject *module, HKEY key, const Py_UNICODE *sub_key, } #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS const wchar_t *value = PyUnicode_AsUnicodeAndSize(value_obj, &value_length); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ wchar_t *value = PyUnicode_AsWideCharString(value_obj, &value_length); #endif /* USE_UNICODE_WCHAR_CACHE */ diff --git a/Python/dynload_win.c b/Python/dynload_win.c index a027c822f188f4..fcd9f6dcbe67ec 100644 --- a/Python/dynload_win.c +++ b/Python/dynload_win.c @@ -172,7 +172,10 @@ dl_funcptr _PyImport_FindSharedFuncptrWindows(const char *prefix, #endif #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS const wchar_t *wpathname = _PyUnicode_AsUnicode(pathname); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ wchar_t *wpathname = PyUnicode_AsWideCharString(pathname, NULL); #endif /* USE_UNICODE_WCHAR_CACHE */ diff --git a/Python/fileutils.c b/Python/fileutils.c index 9b60f6eca412e2..be2dd72073f89a 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -1010,7 +1010,10 @@ _Py_stat(PyObject *path, struct stat *statbuf) struct _stat wstatbuf; #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS const wchar_t *wpath = _PyUnicode_AsUnicode(path); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL); #endif /* USE_UNICODE_WCHAR_CACHE */ @@ -1454,7 +1457,10 @@ _Py_fopen_obj(PyObject *path, const char *mode) return NULL; } #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS const wchar_t *wpath = _PyUnicode_AsUnicode(path); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL); #endif /* USE_UNICODE_WCHAR_CACHE */ From ed14aa950b07c02eced6b5499e203ca881d5631b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 30 Jun 2020 10:40:01 +0300 Subject: [PATCH 09/17] Fix compiler warning in _testcapi. --- Modules/_testcapimodule.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 3bd51983112167..76c22f5f099b37 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1827,6 +1827,10 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored)) return raiseTestError("test_widechar", "PyUnicode_FromWideChar(L\"\\U00110000\", 1) didn't fail"); +#if USE_UNICODE_WCHAR_CACHE +/* Ignore use of deprecated APIs */ +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS wide = PyUnicode_FromUnicode(invalid, 1); if (wide == NULL) PyErr_Clear(); @@ -1834,10 +1838,6 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored)) return raiseTestError("test_widechar", "PyUnicode_FromUnicode(L\"\\U00110000\", 1) didn't fail"); -#if USE_UNICODE_WCHAR_CACHE -/* Ignore use of deprecated APIs */ -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS wide = PyUnicode_FromUnicode(NULL, 1); if (wide == NULL) return NULL; From 97b5228a01dbfd995a2b815b184ff2f0962b1928 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 5 Jul 2020 20:23:03 +0300 Subject: [PATCH 10/17] Use HAVE_UNICODE_WCHAR_CACHE instead of USE_UNICODE_WCHAR_CACHE in _Py_DumpASCII(). --- Python/traceback.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Python/traceback.c b/Python/traceback.c index 1fbb86bd168de5..e183782400556e 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -684,9 +684,9 @@ _Py_DumpASCII(int fd, PyObject *text) int truncated; int kind; void *data = NULL; -#if USE_UNICODE_WCHAR_CACHE +#if HAVE_UNICODE_WCHAR_CACHE wchar_t *wstr = NULL; -#endif /* USE_UNICODE_WCHAR_CACHE */ +#endif /* HAVE_UNICODE_WCHAR_CACHE */ Py_UCS4 ch; if (!PyUnicode_Check(text)) @@ -694,7 +694,7 @@ _Py_DumpASCII(int fd, PyObject *text) size = ascii->length; kind = ascii->state.kind; -#if USE_UNICODE_WCHAR_CACHE +#if HAVE_UNICODE_WCHAR_CACHE if (kind == PyUnicode_WCHAR_KIND) { wstr = ((PyASCIIObject *)text)->wstr; if (wstr == NULL) @@ -702,7 +702,7 @@ _Py_DumpASCII(int fd, PyObject *text) size = ((PyCompactUnicodeObject *)text)->wstr_length; } else -#endif /* USE_UNICODE_WCHAR_CACHE */ +#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (ascii->state.compact) { if (ascii->state.ascii) data = ((PyASCIIObject*)text) + 1; @@ -724,11 +724,11 @@ _Py_DumpASCII(int fd, PyObject *text) } for (i=0; i < size; i++) { -#if USE_UNICODE_WCHAR_CACHE +#if HAVE_UNICODE_WCHAR_CACHE if (kind == PyUnicode_WCHAR_KIND) ch = wstr[i]; else -#endif /* USE_UNICODE_WCHAR_CACHE */ +#endif /* HAVE_UNICODE_WCHAR_CACHE */ ch = PyUnicode_READ(kind, data, i); if (' ' <= ch && ch <= 126) { /* printable ASCII character */ From d6ba6b7511173ddd75eed88b3cab5ec28898f894 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 9 Jul 2020 11:03:28 +0300 Subject: [PATCH 11/17] Set HAVE_UNICODE_WCHAR_CACHE and USE_UNICODE_WCHAR_CACHE only if they were not set. --- Include/cpython/unicodeobject.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 21b87678215e3d..5ac89a1e35e5d9 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -11,8 +11,13 @@ /* --- Internal Unicode Operations ---------------------------------------- */ -#define HAVE_UNICODE_WCHAR_CACHE 1 -#define USE_UNICODE_WCHAR_CACHE 1 +#ifndef HAVE_UNICODE_WCHAR_CACHE +# define HAVE_UNICODE_WCHAR_CACHE 1 +#endif /* HAVE_UNICODE_WCHAR_CACHE */ + +#ifndef USE_UNICODE_WCHAR_CACHE +# define USE_UNICODE_WCHAR_CACHE HAVE_UNICODE_WCHAR_CACHE +#endif /* USE_UNICODE_WCHAR_CACHE */ /* Since splitting on whitespace is an important use case, and whitespace in most situations is solely ASCII whitespace, we From 0da41468ff7db9dc893b75a4c463f9cb0ca3188a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 9 Jul 2020 11:06:12 +0300 Subject: [PATCH 12/17] Remove Py_UNICODE_MATCH. --- Include/cpython/unicodeobject.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 5ac89a1e35e5d9..19571c302b1341 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -81,15 +81,6 @@ Py_UNICODE_FILL(Py_UNICODE *target, Py_UNICODE value, Py_ssize_t length) { #define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF)) #if HAVE_UNICODE_WCHAR_CACHE -/* Check if substring matches at given offset. The offset must be - valid, and the substring must not be empty. */ - -#define Py_UNICODE_MATCH(string, offset, substring) \ - ((*((string)->wstr + (offset)) == *((substring)->wstr)) && \ - ((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \ - !memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE))) -#endif /* HAVE_UNICODE_WCHAR_CACHE */ - /* --- Unicode Type ------------------------------------------------------- */ /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject From e42590850e33e8af4541a547522166ef96c36e5e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 10 Jul 2020 09:52:31 +0300 Subject: [PATCH 13/17] Fix unterminated #if. --- Include/cpython/unicodeobject.h | 1 - 1 file changed, 1 deletion(-) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 19571c302b1341..95ccef38086a8c 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -80,7 +80,6 @@ Py_UNICODE_FILL(Py_UNICODE *target, Py_UNICODE value, Py_ssize_t length) { /* low surrogate = bottom 10 bits added to DC00 */ #define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF)) -#if HAVE_UNICODE_WCHAR_CACHE /* --- Unicode Type ------------------------------------------------------- */ /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject From 8a9259b90d24a1e8bb1c7d92b701001e1c8fc94f Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 10 Jul 2020 10:10:09 +0300 Subject: [PATCH 14/17] Reset arraymodule.c. --- Modules/arraymodule.c | 126 +++++++++------------------------ Modules/clinic/arraymodule.c.h | 20 +++--- 2 files changed, 42 insertions(+), 104 deletions(-) diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index 6e2c3717c28253..2ba2ff43aa8b8a 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -241,40 +241,29 @@ u_getitem(arrayobject *ap, Py_ssize_t i) static int u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) { -#if USE_UNICODE_WCHAR_CACHE - Py_UNICODE *p; - Py_ssize_t len; - - if (!PyArg_Parse(v, "u#;array item must be unicode character", &p, &len)) + PyObject *u; + if (!PyArg_Parse(v, "U;array item must be unicode character", &u)) { return -1; - if (len != 1) { + } + + Py_ssize_t len = PyUnicode_AsWideChar(u, NULL, 0); + if (len != 2) { PyErr_SetString(PyExc_TypeError, "array item must be unicode character"); return -1; } - if (i >= 0) - ((Py_UNICODE *)ap->ob_item)[i] = p[0]; - return 0; -#else - PyObject *u; - Py_ssize_t len; - if (!PyArg_Parse(v, "U;array item must be unicode character", &u)) - return -1; - len = PyUnicode_AsWideChar(u, NULL, 0); - if (len != 2) { - if (len >= 0) { - PyErr_SetString(PyExc_TypeError, - "array item must be unicode character"); - } - return -1; + wchar_t w; + len = PyUnicode_AsWideChar(u, &w, 1); + assert(len == 1); + + if (i >= 0) { + ((wchar_t *)ap->ob_item)[i] = w; } - if (i >= 0) - PyUnicode_AsWideChar(u, &((wchar_t *)ap->ob_item)[i], 1); return 0; -#endif /* USE_UNICODE_WCHAR_CACHE */ } + static PyObject * h_getitem(arrayobject *ap, Py_ssize_t i) { @@ -1667,7 +1656,7 @@ array_array_tobytes_impl(arrayobject *self) /*[clinic input] array.array.fromunicode - arg: unicode + ustr: unicode / Extends this array with data from the unicode string ustr. @@ -1678,41 +1667,28 @@ some other type. [clinic start generated code]*/ static PyObject * -array_array_fromunicode_impl(arrayobject *self, PyObject *arg) -/*[clinic end generated code: output=b691324745f39114 input=bc9c8ea0d901b328]*/ +array_array_fromunicode_impl(arrayobject *self, PyObject *ustr) +/*[clinic end generated code: output=24359f5e001a7f2b input=025db1fdade7a4ce]*/ { - Py_ssize_t len; - - char typecode = self->ob_descr->typecode; - if (typecode != 'u') { + if (self->ob_descr->typecode != 'u') { PyErr_SetString(PyExc_ValueError, "fromunicode() may only be called on " "unicode type arrays"); return NULL; } -#if USE_UNICODE_WCHAR_CACHE -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS - len = PyUnicode_GetSize(arg); - if (len < 0) { - return NULL; - } -_Py_COMP_DIAG_POP -#else /* USE_UNICODE_WCHAR_CACHE */ - len = PyUnicode_AsWideChar(arg, NULL, 0); - if (len < 0) { - return NULL; - } - assert(len > 0); - len--; -#endif /* USE_UNICODE_WCHAR_CACHE */ - - if (len > 0) { + Py_ssize_t ustr_length = PyUnicode_AsWideChar(ustr, NULL, 0); + assert(ustr_length > 0); + if (ustr_length > 1) { + ustr_length--; /* trim trailing NUL character */ Py_ssize_t old_size = Py_SIZE(self); - if (array_resize(self, old_size + len) == -1) + if (array_resize(self, old_size + ustr_length) == -1) { return NULL; - PyUnicode_AsWideChar(arg, (wchar_t *)self->ob_item + old_size, len); + } + + // must not fail + PyUnicode_AsWideChar( + ustr, ((wchar_t *)self->ob_item) + old_size, ustr_length); } Py_RETURN_NONE; @@ -2697,58 +2673,20 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) } else if (initial != NULL && PyUnicode_Check(initial)) { Py_ssize_t n; -#if USE_UNICODE_WCHAR_CACHE -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS - Py_UNICODE *ustr; - - ustr = PyUnicode_AsUnicode(initial); + wchar_t *ustr = PyUnicode_AsWideCharString(initial, &n); if (ustr == NULL) { - PyErr_NoMemory(); Py_DECREF(a); return NULL; } - n = PyUnicode_GET_DATA_SIZE(initial); - if (n > 0) { - arrayobject *self = (arrayobject *)a; - char *item = self->ob_item; - item = (char *)PyMem_Realloc(item, n); - if (item == NULL) { - PyErr_NoMemory(); - Py_DECREF(a); - return NULL; - } - self->ob_item = item; - Py_SET_SIZE(self, n / sizeof(Py_UNICODE)); - memcpy(item, ustr, n); - self->allocated = Py_SIZE(self); - } -_Py_COMP_DIAG_POP -#else /* USE_UNICODE_WCHAR_CACHE */ - n = PyUnicode_AsWideChar(initial, NULL, 0); - if (n < 0) { - PyErr_NoMemory(); - Py_DECREF(a); - return NULL; - } - assert(n > 0); - n--; if (n > 0) { arrayobject *self = (arrayobject *)a; - char *item = self->ob_item; - item = (char *)PyMem_Realloc(item, n * sizeof(wchar_t)); - if (item == NULL) { - PyErr_NoMemory(); - Py_DECREF(a); - return NULL; - } - self->ob_item = item; + // self->ob_item may be NULL but it is safe. + PyMem_Free(self->ob_item); + self->ob_item = (char *)ustr; Py_SET_SIZE(self, n); - PyUnicode_AsWideChar(initial, (wchar_t*)item, n); - self->allocated = Py_SIZE(self); + self->allocated = n; } -#endif /* USE_UNICODE_WCHAR_CACHE */ } else if (initial != NULL && array_Check(initial) && len > 0) { arrayobject *self = (arrayobject *)a; diff --git a/Modules/clinic/arraymodule.c.h b/Modules/clinic/arraymodule.c.h index 7a1dfaf3b8aeda..300cd1397101e8 100644 --- a/Modules/clinic/arraymodule.c.h +++ b/Modules/clinic/arraymodule.c.h @@ -352,7 +352,7 @@ array_array_tobytes(arrayobject *self, PyObject *Py_UNUSED(ignored)) } PyDoc_STRVAR(array_array_fromunicode__doc__, -"fromunicode($self, arg, /)\n" +"fromunicode($self, ustr, /)\n" "--\n" "\n" "Extends this array with data from the unicode string ustr.\n" @@ -365,23 +365,23 @@ PyDoc_STRVAR(array_array_fromunicode__doc__, {"fromunicode", (PyCFunction)array_array_fromunicode, METH_O, array_array_fromunicode__doc__}, static PyObject * -array_array_fromunicode_impl(arrayobject *self, PyObject *arg); +array_array_fromunicode_impl(arrayobject *self, PyObject *ustr); static PyObject * -array_array_fromunicode(arrayobject *self, PyObject *arg_) +array_array_fromunicode(arrayobject *self, PyObject *arg) { PyObject *return_value = NULL; - PyObject *arg; + PyObject *ustr; - if (!PyUnicode_Check(arg_)) { - _PyArg_BadArgument("fromunicode", "argument", "str", arg_); + if (!PyUnicode_Check(arg)) { + _PyArg_BadArgument("fromunicode", "argument", "str", arg); goto exit; } - if (PyUnicode_READY(arg_) == -1) { + if (PyUnicode_READY(arg) == -1) { goto exit; } - arg = arg_; - return_value = array_array_fromunicode_impl(self, arg); + ustr = arg; + return_value = array_array_fromunicode_impl(self, ustr); exit: return return_value; @@ -514,4 +514,4 @@ PyDoc_STRVAR(array_arrayiterator___setstate____doc__, #define ARRAY_ARRAYITERATOR___SETSTATE___METHODDEF \ {"__setstate__", (PyCFunction)array_arrayiterator___setstate__, METH_O, array_arrayiterator___setstate____doc__}, -/*[clinic end generated code: output=c5fbfe7c7b4ff2a8 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=91c1cded65a1285f input=a9049054013a1b77]*/ From 16ac7fd44112ba67b035d5b29aab9316ec63c6f4 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 10 Jul 2020 11:20:06 +0300 Subject: [PATCH 15/17] Temporary disable the wchar_t cache by default. --- Include/cpython/unicodeobject.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 95ccef38086a8c..8f39b2d1f7c39c 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -12,7 +12,7 @@ /* --- Internal Unicode Operations ---------------------------------------- */ #ifndef HAVE_UNICODE_WCHAR_CACHE -# define HAVE_UNICODE_WCHAR_CACHE 1 +# define HAVE_UNICODE_WCHAR_CACHE 0 #endif /* HAVE_UNICODE_WCHAR_CACHE */ #ifndef USE_UNICODE_WCHAR_CACHE From 5950b5caaa45fd8d484129d7da540b6e07211537 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 10 Jul 2020 14:57:43 +0300 Subject: [PATCH 16/17] Remove HAVE_UNICODE_WCHAR_CACHE. --- Include/cpython/unicodeobject.h | 14 +- Objects/unicodeobject.c | 244 +++++++++++--------------------- Python/getargs.c | 9 +- Python/traceback.c | 15 +- 4 files changed, 86 insertions(+), 196 deletions(-) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 8f39b2d1f7c39c..615b4a971d5f47 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -11,12 +11,8 @@ /* --- Internal Unicode Operations ---------------------------------------- */ -#ifndef HAVE_UNICODE_WCHAR_CACHE -# define HAVE_UNICODE_WCHAR_CACHE 0 -#endif /* HAVE_UNICODE_WCHAR_CACHE */ - #ifndef USE_UNICODE_WCHAR_CACHE -# define USE_UNICODE_WCHAR_CACHE HAVE_UNICODE_WCHAR_CACHE +# define USE_UNICODE_WCHAR_CACHE 1 #endif /* USE_UNICODE_WCHAR_CACHE */ /* Since splitting on whitespace is an important use case, and @@ -219,9 +215,7 @@ typedef struct { 4 bytes (see issue #19537 on m68k). */ unsigned int :24; } state; -#if HAVE_UNICODE_WCHAR_CACHE wchar_t *wstr; /* wchar_t representation (null-terminated) */ -#endif /* HAVE_UNICODE_WCHAR_CACHE */ } PyASCIIObject; /* Non-ASCII strings allocated through PyUnicode_New use the @@ -232,10 +226,8 @@ typedef struct { Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the * terminating \0. */ char *utf8; /* UTF-8 representation (null-terminated) */ -#if HAVE_UNICODE_WCHAR_CACHE Py_ssize_t wstr_length; /* Number of code points in wstr, possible * surrogates count as two code points. */ -#endif /* HAVE_UNICODE_WCHAR_CACHE */ } PyCompactUnicodeObject; /* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the @@ -256,7 +248,6 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency( int check_content); /* Fast access macros */ -#if HAVE_UNICODE_WCHAR_CACHE /* Returns the deprecated Py_UNICODE representation's size in code units (this includes surrogate pairs as 2 units). @@ -291,7 +282,6 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency( #define PyUnicode_AS_DATA(op) \ ((const char *)(PyUnicode_AS_UNICODE(op))) -#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* --- Flexible String Representation Helper Macros (PEP 393) -------------- */ @@ -452,7 +442,6 @@ enum PyUnicode_Kind { (0xffffU) : \ (0x10ffffU))))) -#if HAVE_UNICODE_WCHAR_CACHE Py_DEPRECATED(3.3) static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) { return PyUnicode_IS_COMPACT_ASCII(op) ? @@ -460,7 +449,6 @@ static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) { ((PyCompactUnicodeObject*)op)->wstr_length; } #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op) -#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* === Public API ========================================================= */ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e5f17248695c92..b6b56c4f97fa9c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -117,7 +117,6 @@ extern "C" { PyUnicode_IS_COMPACT_ASCII(op) ? \ ((PyASCIIObject*)(op))->length : \ _PyUnicode_UTF8_LENGTH(op)) -#if HAVE_UNICODE_WCHAR_CACHE #define _PyUnicode_WSTR(op) \ (((PyASCIIObject*)(op))->wstr) @@ -129,7 +128,6 @@ extern "C" { ((PyCompactUnicodeObject*)op)->wstr_length) #define _PyUnicode_WSTR_LENGTH(op) \ (((PyCompactUnicodeObject*)(op))->wstr_length) -#endif /* HAVE_UNICODE_WCHAR_CACHE */ #define _PyUnicode_LENGTH(op) \ (((PyASCIIObject *)(op))->length) #define _PyUnicode_STATE(op) \ @@ -156,11 +154,9 @@ extern "C" { (assert(_PyUnicode_CHECK(op)), \ assert(!PyUnicode_IS_COMPACT_ASCII(op)), \ (_PyUnicode_UTF8(op) == PyUnicode_DATA(op))) -#if HAVE_UNICODE_WCHAR_CACHE #define _PyUnicode_SHARE_WSTR(op) \ (assert(_PyUnicode_CHECK(op)), \ (_PyUnicode_WSTR(unicode) == PyUnicode_DATA(op))) -#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* true if the Unicode object has an allocated UTF-8 memory block (not shared with other data) */ @@ -169,14 +165,12 @@ extern "C" { && _PyUnicode_UTF8(op) \ && _PyUnicode_UTF8(op) != PyUnicode_DATA(op))) -#if HAVE_UNICODE_WCHAR_CACHE /* true if the Unicode object has an allocated wstr memory block (not shared with other data) */ #define _PyUnicode_HAS_WSTR_MEMORY(op) \ ((_PyUnicode_WSTR(op) && \ (!PyUnicode_IS_READY(op) || \ _PyUnicode_WSTR(op) != PyUnicode_DATA(op)))) -#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* Generic helper macro to convert characters of different types. from_type and to_type have to be valid type names, begin and end @@ -539,7 +533,6 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) PyUnicodeObject *unicode = (PyUnicodeObject *)op; data = unicode->data.any; -#if HAVE_UNICODE_WCHAR_CACHE if (kind == PyUnicode_WCHAR_KIND) { CHECK(ascii->length == 0); CHECK(ascii->hash == -1); @@ -551,9 +544,7 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) CHECK(data == NULL); CHECK(compact->utf8 == NULL); } - else -#endif /* HAVE_UNICODE_WCHAR_CACHE */ - { + else { CHECK(kind == PyUnicode_1BYTE_KIND || kind == PyUnicode_2BYTE_KIND || kind == PyUnicode_4BYTE_KIND); @@ -568,7 +559,6 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) CHECK(compact->utf8 != data); } } -#if HAVE_UNICODE_WCHAR_CACHE if (kind != PyUnicode_WCHAR_KIND) { if ( #if SIZEOF_WCHAR_T == 2 @@ -583,14 +573,11 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) } else CHECK(ascii->wstr != data); } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (compact->utf8 == NULL) CHECK(compact->utf8_length == 0); -#if HAVE_UNICODE_WCHAR_CACHE if (ascii->wstr == NULL) CHECK(compact->wstr_length == 0); -#endif /* HAVE_UNICODE_WCHAR_CACHE */ } /* check that the best kind is used: O(n) operation */ @@ -978,7 +965,6 @@ ensure_unicode(PyObject *obj) #include "stringlib/find_max_char.h" #include "stringlib/undef.h" -#if HAVE_UNICODE_WCHAR_CACHE _Py_COMP_DIAG_PUSH _Py_COMP_DIAG_IGNORE_DEPR_DECLS #include "stringlib/unicodedefs.h" @@ -987,8 +973,8 @@ _Py_COMP_DIAG_IGNORE_DEPR_DECLS #include "stringlib/find.h" #include "stringlib/undef.h" _Py_COMP_DIAG_POP + #undef STRINGLIB_GET_EMPTY -#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* --- Unicode Object ----------------------------------------------------- */ @@ -1047,6 +1033,7 @@ resize_compact(PyObject *unicode, Py_ssize_t length) Py_ssize_t char_size; Py_ssize_t struct_size; Py_ssize_t new_size; + int share_wstr; PyObject *new_unicode; #ifdef Py_DEBUG Py_ssize_t old_length = _PyUnicode_LENGTH(unicode); @@ -1061,9 +1048,7 @@ resize_compact(PyObject *unicode, Py_ssize_t length) struct_size = sizeof(PyASCIIObject); else struct_size = sizeof(PyCompactUnicodeObject); -#if HAVE_UNICODE_WCHAR_CACHE - int share_wstr = _PyUnicode_SHARE_WSTR(unicode); -#endif /* HAVE_UNICODE_WCHAR_CACHE */ + share_wstr = _PyUnicode_SHARE_WSTR(unicode); if (length > ((PY_SSIZE_T_MAX - struct_size) / char_size - 1)) { PyErr_NoMemory(); @@ -1093,7 +1078,6 @@ resize_compact(PyObject *unicode, Py_ssize_t length) _Py_NewReference(unicode); _PyUnicode_LENGTH(unicode) = length; -#if HAVE_UNICODE_WCHAR_CACHE if (share_wstr) { _PyUnicode_WSTR(unicode) = PyUnicode_DATA(unicode); if (!PyUnicode_IS_ASCII(unicode)) @@ -1105,7 +1089,6 @@ resize_compact(PyObject *unicode, Py_ssize_t length) if (!PyUnicode_IS_ASCII(unicode)) _PyUnicode_WSTR_LENGTH(unicode) = 0; } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ #ifdef Py_DEBUG unicode_fill_invalid(unicode, old_length); #endif @@ -1118,13 +1101,14 @@ resize_compact(PyObject *unicode, Py_ssize_t length) static int resize_inplace(PyObject *unicode, Py_ssize_t length) { + wchar_t *wstr; Py_ssize_t new_size; assert(!PyUnicode_IS_COMPACT(unicode)); assert(Py_REFCNT(unicode) == 1); if (PyUnicode_IS_READY(unicode)) { Py_ssize_t char_size; - int share_utf8; + int share_wstr, share_utf8; void *data; #ifdef Py_DEBUG Py_ssize_t old_length = _PyUnicode_LENGTH(unicode); @@ -1132,9 +1116,7 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) data = _PyUnicode_DATA_ANY(unicode); char_size = PyUnicode_KIND(unicode); -#if HAVE_UNICODE_WCHAR_CACHE - int share_wstr = _PyUnicode_SHARE_WSTR(unicode); -#endif /* HAVE_UNICODE_WCHAR_CACHE */ + share_wstr = _PyUnicode_SHARE_WSTR(unicode); share_utf8 = _PyUnicode_SHARE_UTF8(unicode); if (length > (PY_SSIZE_T_MAX / char_size - 1)) { @@ -1156,12 +1138,10 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) return -1; } _PyUnicode_DATA_ANY(unicode) = data; -#if HAVE_UNICODE_WCHAR_CACHE if (share_wstr) { _PyUnicode_WSTR(unicode) = data; _PyUnicode_WSTR_LENGTH(unicode) = length; } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (share_utf8) { _PyUnicode_UTF8(unicode) = data; _PyUnicode_UTF8_LENGTH(unicode) = length; @@ -1171,16 +1151,11 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) #ifdef Py_DEBUG unicode_fill_invalid(unicode, old_length); #endif -#if HAVE_UNICODE_WCHAR_CACHE if (share_wstr || _PyUnicode_WSTR(unicode) == NULL) { -#endif /* HAVE_UNICODE_WCHAR_CACHE */ assert(_PyUnicode_CheckConsistency(unicode, 0)); return 0; -#if HAVE_UNICODE_WCHAR_CACHE } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ } -#if HAVE_UNICODE_WCHAR_CACHE assert(_PyUnicode_WSTR(unicode) != NULL); /* check for integer overflow */ @@ -1189,7 +1164,7 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) return -1; } new_size = sizeof(wchar_t) * (length + 1); - wchar_t *wstr = _PyUnicode_WSTR(unicode); + wstr = _PyUnicode_WSTR(unicode); wstr = PyObject_REALLOC(wstr, new_size); if (!wstr) { PyErr_NoMemory(); @@ -1198,7 +1173,6 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) _PyUnicode_WSTR(unicode) = wstr; _PyUnicode_WSTR(unicode)[length] = 0; _PyUnicode_WSTR_LENGTH(unicode) = length; -#endif /* HAVE_UNICODE_WCHAR_CACHE */ assert(_PyUnicode_CheckConsistency(unicode, 0)); return 0; } @@ -1207,8 +1181,20 @@ static PyObject* resize_copy(PyObject *unicode, Py_ssize_t length) { Py_ssize_t copy_length; -#if HAVE_UNICODE_WCHAR_CACHE - if (_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND) { + if (_PyUnicode_KIND(unicode) != PyUnicode_WCHAR_KIND) { + PyObject *copy; + + assert(PyUnicode_IS_READY(unicode)); + + copy = PyUnicode_New(length, PyUnicode_MAX_CHAR_VALUE(unicode)); + if (copy == NULL) + return NULL; + + copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode)); + _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, copy_length); + return copy; + } + else { PyObject *w; w = (PyObject*)_PyUnicode_New(length); @@ -1220,18 +1206,6 @@ resize_copy(PyObject *unicode, Py_ssize_t length) copy_length * sizeof(wchar_t)); return w; } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ - PyObject *copy; - - assert(PyUnicode_IS_READY(unicode)); - - copy = PyUnicode_New(length, PyUnicode_MAX_CHAR_VALUE(unicode)); - if (copy == NULL) - return NULL; - - copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode)); - _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, copy_length); - return copy; } /* We allocate one more byte to make sure the string is @@ -1246,12 +1220,14 @@ resize_copy(PyObject *unicode, Py_ssize_t length) static PyUnicodeObject * _PyUnicode_New(Py_ssize_t length) { + PyUnicodeObject *unicode; + size_t new_size; + /* Optimization for empty strings */ if (length == 0) { return (PyUnicodeObject *)unicode_new_empty(); } -#if HAVE_UNICODE_WCHAR_CACHE /* Ensure we won't overflow the size. */ if (length > ((PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UNICODE)) - 1)) { return (PyUnicodeObject *)PyErr_NoMemory(); @@ -1262,10 +1238,10 @@ _PyUnicode_New(Py_ssize_t length) return NULL; } - PyUnicodeObject *unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type); + unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type); if (unicode == NULL) return NULL; - size_t new_size = sizeof(Py_UNICODE) * ((size_t)length + 1); + new_size = sizeof(Py_UNICODE) * ((size_t)length + 1); _PyUnicode_WSTR_LENGTH(unicode) = length; _PyUnicode_HASH(unicode) = -1; @@ -1298,11 +1274,6 @@ _PyUnicode_New(Py_ssize_t length) assert(_PyUnicode_CheckConsistency((PyObject *)unicode, 0)); return unicode; -#else /* HAVE_UNICODE_WCHAR_CACHE */ - PyErr_SetString(PyExc_SystemError, - "_PyUnicode_New() with non-zero size is not supported"); - return NULL; -#endif /* HAVE_UNICODE_WCHAR_CACHE */ } static const char* @@ -1384,23 +1355,18 @@ _PyUnicode_Dump(PyObject *op) } else data = unicode->data.any; - printf("%s: len=%zu", unicode_kind_name(op), ascii->length); + printf("%s: len=%zu, ", unicode_kind_name(op), ascii->length); -#if HAVE_UNICODE_WCHAR_CACHE - printf(", "); if (ascii->wstr == data) printf("shared "); - printf("wstr=%p", ascii->wstr); -#endif /* HAVE_UNICODE_WCHAR_CACHE */ + printf("wstr=%p", (void *)ascii->wstr); if (!(ascii->state.ascii == 1 && ascii->state.compact == 1)) { -#if HAVE_UNICODE_WCHAR_CACHE - printf(" (%zu)", compact->wstr_length); -#endif /* HAVE_UNICODE_WCHAR_CACHE */ - printf(", "); - if (!ascii->state.compact && compact->utf8 == unicode->data.any) + printf(" (%zu), ", compact->wstr_length); + if (!ascii->state.compact && compact->utf8 == unicode->data.any) { printf("shared "); - printf("utf8=%p (%zu)", compact->utf8, compact->utf8_length); + } + printf("utf8=%p (%zu)", (void *)compact->utf8, compact->utf8_length); } printf(", data=%p\n", data); } @@ -1438,22 +1404,28 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) PyCompactUnicodeObject *unicode; void *data; enum PyUnicode_Kind kind; - int is_ascii; + int is_sharing, is_ascii; Py_ssize_t char_size; Py_ssize_t struct_size; is_ascii = 0; + is_sharing = 0; struct_size = sizeof(PyCompactUnicodeObject); if (maxchar < 128) { kind = PyUnicode_1BYTE_KIND; + char_size = 1; is_ascii = 1; struct_size = sizeof(PyASCIIObject); } else if (maxchar < 256) { kind = PyUnicode_1BYTE_KIND; + char_size = 1; } else if (maxchar < 65536) { kind = PyUnicode_2BYTE_KIND; + char_size = 2; + if (sizeof(wchar_t) == 2) + is_sharing = 1; } else { if (maxchar > MAX_UNICODE) { @@ -1462,11 +1434,10 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) return NULL; } kind = PyUnicode_4BYTE_KIND; + char_size = 4; + if (sizeof(wchar_t) == 4) + is_sharing = 1; } - char_size = kind; -#if HAVE_UNICODE_WCHAR_CACHE - int is_sharing = (sizeof(wchar_t) == kind); -#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* Ensure we won't overflow the size. */ if (size < 0) { @@ -1501,35 +1472,29 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) _PyUnicode_STATE(unicode).ascii = is_ascii; if (is_ascii) { ((char*)data)[size] = 0; -#if HAVE_UNICODE_WCHAR_CACHE _PyUnicode_WSTR(unicode) = NULL; -#endif /* HAVE_UNICODE_WCHAR_CACHE */ + } + else if (kind == PyUnicode_1BYTE_KIND) { + ((char*)data)[size] = 0; + _PyUnicode_WSTR(unicode) = NULL; + _PyUnicode_WSTR_LENGTH(unicode) = 0; + unicode->utf8 = NULL; + unicode->utf8_length = 0; } else { unicode->utf8 = NULL; unicode->utf8_length = 0; - if (kind == PyUnicode_1BYTE_KIND) { - ((char*)data)[size] = 0; -#if HAVE_UNICODE_WCHAR_CACHE - _PyUnicode_WSTR(unicode) = NULL; - _PyUnicode_WSTR_LENGTH(unicode) = 0; -#endif /* HAVE_UNICODE_WCHAR_CACHE */ + if (kind == PyUnicode_2BYTE_KIND) + ((Py_UCS2*)data)[size] = 0; + else /* kind == PyUnicode_4BYTE_KIND */ + ((Py_UCS4*)data)[size] = 0; + if (is_sharing) { + _PyUnicode_WSTR_LENGTH(unicode) = size; + _PyUnicode_WSTR(unicode) = (wchar_t *)data; } else { - if (kind == PyUnicode_2BYTE_KIND) - ((Py_UCS2*)data)[size] = 0; - else /* kind == PyUnicode_4BYTE_KIND */ - ((Py_UCS4*)data)[size] = 0; -#if HAVE_UNICODE_WCHAR_CACHE - if (is_sharing) { - _PyUnicode_WSTR_LENGTH(unicode) = size; - _PyUnicode_WSTR(unicode) = (wchar_t *)data; - } - else { - _PyUnicode_WSTR_LENGTH(unicode) = 0; - _PyUnicode_WSTR(unicode) = NULL; - } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ + _PyUnicode_WSTR_LENGTH(unicode) = 0; + _PyUnicode_WSTR(unicode) = NULL; } } #ifdef Py_DEBUG @@ -1844,14 +1809,12 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end, int _PyUnicode_Ready(PyObject *unicode) { -#if HAVE_UNICODE_WCHAR_CACHE wchar_t *end; Py_UCS4 maxchar = 0; Py_ssize_t num_surrogates; #if SIZEOF_WCHAR_T == 2 Py_ssize_t length_wo_surrogates; #endif -#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* _PyUnicode_Ready() is only intended for old-style API usage where strings were created using _PyObject_New() and where no canonical @@ -1859,15 +1822,12 @@ _PyUnicode_Ready(PyObject *unicode) which are not yet ready. */ assert(_PyUnicode_CHECK(unicode)); assert(_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND); -#if HAVE_UNICODE_WCHAR_CACHE assert(_PyUnicode_WSTR(unicode) != NULL); -#endif /* HAVE_UNICODE_WCHAR_CACHE */ assert(_PyUnicode_DATA_ANY(unicode) == NULL); assert(_PyUnicode_UTF8(unicode) == NULL); /* Actually, it should neither be interned nor be anything else: */ assert(_PyUnicode_STATE(unicode).interned == SSTATE_NOT_INTERNED); -#if HAVE_UNICODE_WCHAR_CACHE end = _PyUnicode_WSTR(unicode) + _PyUnicode_WSTR_LENGTH(unicode); if (find_maxchar_surrogates(_PyUnicode_WSTR(unicode), end, &maxchar, &num_surrogates) == -1) @@ -1970,7 +1930,6 @@ _PyUnicode_Ready(PyObject *unicode) #endif PyUnicode_4BYTE_DATA(unicode)[_PyUnicode_LENGTH(unicode)] = '\0'; } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ _PyUnicode_STATE(unicode).ready = 1; assert(_PyUnicode_CheckConsistency(unicode, 1)); return 0; @@ -2009,11 +1968,9 @@ unicode_dealloc(PyObject *unicode) Py_UNREACHABLE(); } -#if HAVE_UNICODE_WCHAR_CACHE if (_PyUnicode_HAS_WSTR_MEMORY(unicode)) { PyObject_DEL(_PyUnicode_WSTR(unicode)); } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) { PyObject_DEL(_PyUnicode_UTF8(unicode)); } @@ -2076,11 +2033,9 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length) assert(PyUnicode_Check(unicode)); assert(0 <= length); -#if HAVE_UNICODE_WCHAR_CACHE if (_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND) old_length = PyUnicode_WSTR_LENGTH(unicode); else -#endif /* HAVE_UNICODE_WCHAR_CACHE */ old_length = PyUnicode_GET_LENGTH(unicode); if (old_length == length) return 0; @@ -4225,7 +4180,6 @@ PyUnicode_AsUTF8(PyObject *unicode) Py_UNICODE * PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size) { -#if HAVE_UNICODE_WCHAR_CACHE if (!PyUnicode_Check(unicode)) { PyErr_BadArgument(); return NULL; @@ -4255,10 +4209,6 @@ PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size) if (size != NULL) *size = PyUnicode_WSTR_LENGTH(unicode); return w; -#else /* HAVE_UNICODE_WCHAR_CACHE */ - PyErr_SetString(PyExc_SystemError, "PyUnicode_AsUnicodeAndSize is not supported"); - return NULL; -#endif /* HAVE_UNICODE_WCHAR_CACHE */ } /* Deprecated APIs */ @@ -4290,7 +4240,6 @@ _PyUnicode_AsUnicode(PyObject *unicode) Py_ssize_t PyUnicode_GetSize(PyObject *unicode) { -#if HAVE_UNICODE_WCHAR_CACHE if (!PyUnicode_Check(unicode)) { PyErr_BadArgument(); goto onError; @@ -4303,10 +4252,6 @@ PyUnicode_GetSize(PyObject *unicode) onError: return -1; -#else /* HAVE_UNICODE_WCHAR_CACHE */ - PyErr_SetString(PyExc_SystemError, "PyUnicode_GetSize is not supported"); - return -1; -#endif /* HAVE_UNICODE_WCHAR_CACHE */ } _Py_COMP_DIAG_POP @@ -11414,11 +11359,10 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) Py_ssize_t i; int kind; Py_UCS4 chr; + const unsigned char *ustr = (const unsigned char *)str; assert(_PyUnicode_CHECK(uni)); -#if HAVE_UNICODE_WCHAR_CACHE if (!PyUnicode_IS_READY(uni)) { - const unsigned char *ustr = (const unsigned char *)str; const wchar_t *ws = _PyUnicode_WSTR(uni); /* Compare Unicode string and source character set string */ for (i = 0; (chr = ws[i]) && ustr[i]; i++) { @@ -11433,7 +11377,6 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) return -1; /* str is longer */ return 0; } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ kind = PyUnicode_KIND(uni); if (kind == PyUnicode_1BYTE_KIND) { const void *data = PyUnicode_1BYTE_DATA(uni); @@ -11471,7 +11414,6 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) } } -#if HAVE_UNICODE_WCHAR_CACHE static int non_ready_unicode_equal_to_ascii_string(PyObject *unicode, const char *str) { @@ -11489,7 +11431,6 @@ non_ready_unicode_equal_to_ascii_string(PyObject *unicode, const char *str) } return 1; } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ int _PyUnicode_EqualToASCIIString(PyObject *unicode, const char *str) @@ -11502,13 +11443,11 @@ _PyUnicode_EqualToASCIIString(PyObject *unicode, const char *str) assert((unsigned char)*p < 128); } #endif -#if HAVE_UNICODE_WCHAR_CACHE if (PyUnicode_READY(unicode) == -1) { /* Memory error or bad data */ PyErr_Clear(); return non_ready_unicode_equal_to_ascii_string(unicode, str); } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (!PyUnicode_IS_ASCII(unicode)) return 0; len = (size_t)PyUnicode_GET_LENGTH(unicode); @@ -11529,13 +11468,11 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right) } #endif -#if HAVE_UNICODE_WCHAR_CACHE if (PyUnicode_READY(left) == -1) { /* memory error or bad data */ PyErr_Clear(); return non_ready_unicode_equal_to_ascii_string(left, right->string); } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (!PyUnicode_IS_ASCII(left)) return 0; @@ -11813,7 +11750,7 @@ PyUnicode_AppendAndDel(PyObject **pleft, PyObject *right) } /* -Wraps asciilib_parse_args_finds() and additionally ensures that the +Wraps stringlib_parse_args_finds() and additionally ensures that the first argument is a unicode object. */ @@ -11822,7 +11759,7 @@ parse_args_finds_unicode(const char * function_name, PyObject *args, PyObject **substring, Py_ssize_t *start, Py_ssize_t *end) { - if(asciilib_parse_args_finds(function_name, args, substring, + if(stringlib_parse_args_finds(function_name, args, substring, start, end)) { if (ensure_unicode(*substring) < 0) return 0; @@ -12573,14 +12510,11 @@ _PyUnicode_ScanIdentifier(PyObject *self) int PyUnicode_IsIdentifier(PyObject *self) { -#if HAVE_UNICODE_WCHAR_CACHE if (PyUnicode_IS_READY(self)) { -#endif /* HAVE_UNICODE_WCHAR_CACHE */ Py_ssize_t i = _PyUnicode_ScanIdentifier(self); Py_ssize_t len = PyUnicode_GET_LENGTH(self); /* an empty string is not a valid identifier */ return len && i == len; -#if HAVE_UNICODE_WCHAR_CACHE } else { _Py_COMP_DIAG_PUSH @@ -12624,7 +12558,6 @@ _Py_COMP_DIAG_IGNORE_DEPR_DECLS return 1; _Py_COMP_DIAG_POP } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ } /*[clinic input] @@ -13902,7 +13835,7 @@ unicode_startswith(PyObject *self, Py_ssize_t end = PY_SSIZE_T_MAX; int result; - if (!asciilib_parse_args_finds("startswith", args, &subobj, &start, &end)) + if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end)) return NULL; if (PyTuple_Check(subobj)) { Py_ssize_t i; @@ -13956,7 +13889,7 @@ unicode_endswith(PyObject *self, Py_ssize_t end = PY_SSIZE_T_MAX; int result; - if (!asciilib_parse_args_finds("endswith", args, &subobj, &start, &end)) + if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end)) return NULL; if (PyTuple_Check(subobj)) { Py_ssize_t i; @@ -14400,12 +14333,10 @@ unicode_sizeof_impl(PyObject *self) size += (PyUnicode_GET_LENGTH(self) + 1) * PyUnicode_KIND(self); } -#if HAVE_UNICODE_WCHAR_CACHE /* If the wstr pointer is present, account for it unless it is shared with the data pointer. Check if the data is not shared. */ if (_PyUnicode_HAS_WSTR_MEMORY(self)) size += (PyUnicode_WSTR_LENGTH(self) + 1) * sizeof(wchar_t); -#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (_PyUnicode_HAS_UTF8_MEMORY(self)) size += PyUnicode_UTF8_LENGTH(self) + 1; @@ -15562,8 +15493,8 @@ static PyObject * unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { PyObject *unicode, *self; - Py_ssize_t length; - int share_utf8; + Py_ssize_t length, char_size; + int share_wstr, share_utf8; unsigned int kind; void *data; @@ -15597,35 +15528,37 @@ unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) _PyUnicode_STATE(self).compact = 0; _PyUnicode_STATE(self).ascii = _PyUnicode_STATE(unicode).ascii; _PyUnicode_STATE(self).ready = 1; + _PyUnicode_WSTR(self) = NULL; _PyUnicode_UTF8_LENGTH(self) = 0; _PyUnicode_UTF8(self) = NULL; -#if HAVE_UNICODE_WCHAR_CACHE _PyUnicode_WSTR_LENGTH(self) = 0; - _PyUnicode_WSTR(self) = NULL; -#endif /* HAVE_UNICODE_WCHAR_CACHE */ _PyUnicode_DATA_ANY(self) = NULL; share_utf8 = 0; -#if HAVE_UNICODE_WCHAR_CACHE - int share_wstr = 0; -#endif /* HAVE_UNICODE_WCHAR_CACHE */ + share_wstr = 0; if (kind == PyUnicode_1BYTE_KIND) { + char_size = 1; if (PyUnicode_MAX_CHAR_VALUE(unicode) < 128) share_utf8 = 1; } -#if HAVE_UNICODE_WCHAR_CACHE + else if (kind == PyUnicode_2BYTE_KIND) { + char_size = 2; + if (sizeof(wchar_t) == 2) + share_wstr = 1; + } else { - if (sizeof(wchar_t) == kind) + assert(kind == PyUnicode_4BYTE_KIND); + char_size = 4; + if (sizeof(wchar_t) == 4) share_wstr = 1; } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* Ensure we won't overflow the length. */ - if (length > (PY_SSIZE_T_MAX / kind - 1)) { + if (length > (PY_SSIZE_T_MAX / char_size - 1)) { PyErr_NoMemory(); goto onError; } - data = PyObject_MALLOC((length + 1) * kind); + data = PyObject_MALLOC((length + 1) * char_size); if (data == NULL) { PyErr_NoMemory(); goto onError; @@ -15636,12 +15569,10 @@ unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) _PyUnicode_UTF8_LENGTH(self) = length; _PyUnicode_UTF8(self) = data; } -#if HAVE_UNICODE_WCHAR_CACHE if (share_wstr) { _PyUnicode_WSTR_LENGTH(self) = length; _PyUnicode_WSTR(self) = (wchar_t *)data; } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ memcpy(data, PyUnicode_DATA(unicode), kind * (length + 1)); @@ -15980,7 +15911,7 @@ unicodeiter_reduce(unicodeiterobject *it, PyObject *Py_UNUSED(ignored)) return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter), it->it_seq, it->it_index); } else { - PyObject *u = PyUnicode_New(0, 0); + PyObject *u = (PyObject *)_PyUnicode_New(0); if (u == NULL) return NULL; return Py_BuildValue("N(N)", _PyEval_GetBuiltinId(&PyId_iter), u); @@ -16071,21 +16002,6 @@ unicode_iter(PyObject *seq) return (PyObject *)it; } -Py_UNICODE* -PyUnicode_AsUnicodeCopy(PyObject *unicode) -{ - Py_ssize_t len; -#if USE_UNICODE_WCHAR_CACHE -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS - if (PyUnicode_AsUnicode(unicode) == NULL) - return NULL; -_Py_COMP_DIAG_POP -#endif /* USE_UNICODE_WCHAR_CACHE */ - return PyUnicode_AsWideCharString(unicode, &len); -} - - static int encode_wstr_utf8(wchar_t *wstr, char **str, const char *name) { diff --git a/Python/getargs.c b/Python/getargs.c index 534e209406e4dc..c85ff6d4777d2c 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -1014,7 +1014,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'u': /* raw unicode buffer (Py_UNICODE *) */ case 'Z': /* raw unicode buffer or None */ { -#if HAVE_UNICODE_WCHAR_CACHE // TODO: Raise DeprecationWarning _Py_COMP_DIAG_PUSH _Py_COMP_DIAG_IGNORE_DEPR_DECLS @@ -1057,14 +1056,8 @@ _Py_COMP_DIAG_IGNORE_DEPR_DECLS return converterr(c == 'Z' ? "str or None" : "str", arg, msgbuf, bufsize); } -_Py_COMP_DIAG_POP -#else /* HAVE_UNICODE_WCHAR_CACHE */ - PyErr_Format(PyExc_SystemError, - "unsupported PyArg_Parse format: \"%s\"", - format-1); - RETURN_ERR_OCCURRED; -#endif /* HAVE_UNICODE_WCHAR_CACHE */ break; +_Py_COMP_DIAG_POP } case 'e': {/* encoded string */ diff --git a/Python/traceback.c b/Python/traceback.c index e183782400556e..99b63af11f8bee 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -684,9 +684,7 @@ _Py_DumpASCII(int fd, PyObject *text) int truncated; int kind; void *data = NULL; -#if HAVE_UNICODE_WCHAR_CACHE wchar_t *wstr = NULL; -#endif /* HAVE_UNICODE_WCHAR_CACHE */ Py_UCS4 ch; if (!PyUnicode_Check(text)) @@ -694,16 +692,13 @@ _Py_DumpASCII(int fd, PyObject *text) size = ascii->length; kind = ascii->state.kind; -#if HAVE_UNICODE_WCHAR_CACHE if (kind == PyUnicode_WCHAR_KIND) { wstr = ((PyASCIIObject *)text)->wstr; if (wstr == NULL) return; size = ((PyCompactUnicodeObject *)text)->wstr_length; } - else -#endif /* HAVE_UNICODE_WCHAR_CACHE */ - if (ascii->state.compact) { + else if (ascii->state.compact) { if (ascii->state.ascii) data = ((PyASCIIObject*)text) + 1; else @@ -724,12 +719,10 @@ _Py_DumpASCII(int fd, PyObject *text) } for (i=0; i < size; i++) { -#if HAVE_UNICODE_WCHAR_CACHE - if (kind == PyUnicode_WCHAR_KIND) - ch = wstr[i]; - else -#endif /* HAVE_UNICODE_WCHAR_CACHE */ + if (kind != PyUnicode_WCHAR_KIND) ch = PyUnicode_READ(kind, data, i); + else + ch = wstr[i]; if (' ' <= ch && ch <= 126) { /* printable ASCII character */ char c = (char)ch; From 672b8d71dd40a3a2e3f6db1e7ba7a020b41c66fe Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 10 Jul 2020 21:13:41 +0300 Subject: [PATCH 17/17] Fix possible leaks. --- PC/winreg.c | 3 +++ Python/fileutils.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/PC/winreg.c b/PC/winreg.c index 56e8c3a0aa5f74..a24d784c773c02 100644 --- a/PC/winreg.c +++ b/PC/winreg.c @@ -1731,6 +1731,9 @@ _Py_COMP_DIAG_POP if (PySys_Audit("winreg.SetValue", "nunu#", (Py_ssize_t)key, sub_key, (Py_ssize_t)type, value, value_length) < 0) { +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(value); +#endif /* USE_UNICODE_WCHAR_CACHE */ return NULL; } diff --git a/Python/fileutils.c b/Python/fileutils.c index 9b60f6eca412e2..50ef3c174acc84 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -1465,6 +1465,9 @@ _Py_fopen_obj(PyObject *path, const char *mode) wmode, Py_ARRAY_LENGTH(wmode)); if (usize == 0) { PyErr_SetFromWindowsErr(0); +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(wpath); +#endif /* USE_UNICODE_WCHAR_CACHE */ return NULL; } pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy