From 389dfd9113d1864eb8f819e5cc47c103b5af103a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 12 Apr 2024 18:36:39 +0300 Subject: [PATCH 1/9] gh-112068: C API: Add support of nullable arguments in PyArg_Parse --- Lib/test/test_capi/test_getargs.py | 53 +++++++ Modules/_ctypes/_ctypes.c | 11 +- Modules/_json.c | 13 +- Modules/_threadmodule.c | 14 +- Modules/mmapmodule.c | 3 +- Python/getargs.c | 216 ++++++++++++++++++----------- 6 files changed, 199 insertions(+), 111 deletions(-) diff --git a/Lib/test/test_capi/test_getargs.py b/Lib/test/test_capi/test_getargs.py index 232aa2a80025dc..cc356cc6132a91 100644 --- a/Lib/test/test_capi/test_getargs.py +++ b/Lib/test/test_capi/test_getargs.py @@ -1352,6 +1352,59 @@ def test_nested_tuple(self): "argument 1 must be sequence of length 1, not 0"): parse(((),), {}, '(' + f + ')', ['a']) + def test_nullable(self): + parse = _testcapi.parse_tuple_and_keywords + + def check(format, arg, allows_none=False): + # Because some format units (such as y*) require cleanup, + # we force the parsing code to perform the cleanup by adding + # an argument that always fails. + # By checking for an exception, we ensure that the parsing + # of the first argument was successful. + self.assertRaises(OverflowError, parse, + (arg, 256), {}, format + '?b', ['a', 'b']) + self.assertRaises(OverflowError, parse, + (None, 256), {}, format + '?b', ['a', 'b']) + self.assertRaises(OverflowError, parse, + (arg, 256), {}, format + 'b', ['a', 'b']) + self.assertRaises(OverflowError if allows_none else TypeError, parse, + (None, 256), {}, format + 'b', ['a', 'b']) + + check('b', 42) + check('B', 42) + check('h', 42) + check('H', 42) + check('i', 42) + check('I', 42) + check('n', 42) + check('l', 42) + check('k', 42) + check('L', 42) + check('K', 42) + check('f', 2.5) + check('d', 2.5) + check('D', 2.5j) + check('c', b'a') + check('C', 'a') + check('p', True, allows_none=True) + check('y', b'buffer') + check('y*', b'buffer') + check('y#', b'buffer') + check('s', 'string') + check('s*', 'string') + check('s#', 'string') + check('z', 'string', allows_none=True) + check('z*', 'string', allows_none=True) + check('z#', 'string', allows_none=True) + check('w*', bytearray(b'buffer')) + check('U', 'string') + check('S', b'bytes') + check('Y', bytearray(b'bytearray')) + check('O', object, allows_none=True) + + # TODO: Not tested: es?, es#?, et?, et#?, O!, O& + # TODO: Not implemented: (...)? + @unittest.skipIf(_testinternalcapi is None, 'needs _testinternalcapi') def test_gh_119213(self): rc, out, err = script_helper.assert_python_ok("-c", """if True: diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index 1ff108a39320cf..c21715fda7c601 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -3620,8 +3620,7 @@ _validate_paramflags(ctypes_state *st, PyTypeObject *type, PyObject *paramflags) PyObject *name = Py_None; PyObject *defval; PyObject *typ; - if (!PyArg_ParseTuple(item, "i|OO", &flag, &name, &defval) || - !(name == Py_None || PyUnicode_Check(name))) + if (!PyArg_ParseTuple(item, "i|U?O", &flag, &name, &defval)) { PyErr_SetString(PyExc_TypeError, "paramflags must be a sequence of (int [,string [,value]]) tuples"); @@ -3686,10 +3685,8 @@ PyCFuncPtr_FromDll(PyTypeObject *type, PyObject *args, PyObject *kwds) void *handle; PyObject *paramflags = NULL; - if (!PyArg_ParseTuple(args, "O|O", &ftuple, ¶mflags)) + if (!PyArg_ParseTuple(args, "O|O?", &ftuple, ¶mflags)) return NULL; - if (paramflags == Py_None) - paramflags = NULL; ftuple = PySequence_Tuple(ftuple); if (!ftuple) @@ -3804,10 +3801,8 @@ PyCFuncPtr_FromVtblIndex(PyTypeObject *type, PyObject *args, PyObject *kwds) GUID *iid = NULL; Py_ssize_t iid_len = 0; - if (!PyArg_ParseTuple(args, "is|Oz#", &index, &name, ¶mflags, &iid, &iid_len)) + if (!PyArg_ParseTuple(args, "is|O?z#", &index, &name, ¶mflags, &iid, &iid_len)) return NULL; - if (paramflags == Py_None) - paramflags = NULL; ctypes_state *st = get_module_state_by_def(Py_TYPE(type)); if (!_validate_paramflags(st, type, paramflags)) { diff --git a/Modules/_json.c b/Modules/_json.c index c7fe1561bb1018..4fb18d3882ddf1 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1208,23 +1208,16 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL}; PyEncoderObject *s; - PyObject *markers, *defaultfn, *encoder, *indent, *key_separator; + PyObject *markers = Py_None, *defaultfn, *encoder, *indent, *key_separator; PyObject *item_separator; int sort_keys, skipkeys, allow_nan; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist, - &markers, &defaultfn, &encoder, &indent, + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!?OOOUUppp:make_encoder", kwlist, + &PyDict_Type, &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator, &sort_keys, &skipkeys, &allow_nan)) return NULL; - if (markers != Py_None && !PyDict_Check(markers)) { - PyErr_Format(PyExc_TypeError, - "make_encoder() argument 1 must be dict or None, " - "not %.200s", Py_TYPE(markers)->tp_name); - return NULL; - } - s = (PyEncoderObject *)type->tp_alloc(type, 0); if (s == NULL) return NULL; diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 39d309729d88b8..0834f46a847d8e 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -1878,10 +1878,10 @@ thread_PyThread_start_joinable_thread(PyObject *module, PyObject *fargs, PyObject *func = NULL; int daemon = 1; thread_module_state *state = get_thread_state(module); - PyObject *hobj = NULL; + PyObject *hobj = Py_None; if (!PyArg_ParseTupleAndKeywords(fargs, fkwargs, - "O|Op:start_joinable_thread", keywords, - &func, &hobj, &daemon)) { + "O|O!?p:start_joinable_thread", keywords, + &func, state->thread_handle_type, &hobj, &daemon)) { return NULL; } @@ -1891,14 +1891,6 @@ thread_PyThread_start_joinable_thread(PyObject *module, PyObject *fargs, return NULL; } - if (hobj == NULL) { - hobj = Py_None; - } - else if (hobj != Py_None && !Py_IS_TYPE(hobj, state->thread_handle_type)) { - PyErr_SetString(PyExc_TypeError, "'handle' must be a _ThreadHandle"); - return NULL; - } - if (PySys_Audit("_thread.start_joinable_thread", "OiO", func, daemon, hobj) < 0) { return NULL; diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c index 99a85e9e49ad47..9e0df26bac1c7c 100644 --- a/Modules/mmapmodule.c +++ b/Modules/mmapmodule.c @@ -23,7 +23,6 @@ #endif #include -#include "pycore_abstract.h" // _Py_convert_optional_to_ssize_t() #include "pycore_bytesobject.h" // _PyBytes_Find() #include "pycore_fileutils.h" // _Py_stat_struct @@ -512,7 +511,7 @@ mmap_read_method(mmap_object *self, Py_ssize_t num_bytes = PY_SSIZE_T_MAX, remaining; CHECK_VALID(NULL); - if (!PyArg_ParseTuple(args, "|O&:read", _Py_convert_optional_to_ssize_t, &num_bytes)) + if (!PyArg_ParseTuple(args, "|n?:read", &num_bytes)) return NULL; CHECK_VALID(NULL); diff --git a/Python/getargs.c b/Python/getargs.c index b96ce3a22dae7c..b2aad4696b6c52 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -21,6 +21,7 @@ PyAPI_FUNC(int) _PyArg_VaParseTupleAndKeywords_SizeT(PyObject *, PyObject *, const char *, const char * const *, va_list); #define FLAG_COMPAT 1 +#define FLAG_NULLABLE 4 typedef int (*destr_t)(PyObject *, void *); @@ -487,8 +488,9 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (!PySequence_Check(arg) || PyBytes_Check(arg)) { levels[0] = 0; PyOS_snprintf(msgbuf, bufsize, - "must be %d-item sequence, not %.50s", + "must be %d-item sequence%s, not %.50s", n, + (flags & FLAG_NULLABLE) ? " or None" : "", arg == Py_None ? "None" : Py_TYPE(arg)->tp_name); return msgbuf; } @@ -502,6 +504,7 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, return msgbuf; } + flags &= ~FLAG_NULLABLE; format = *p_format; for (i = 0; i < n; i++) { const char *msg; @@ -573,7 +576,7 @@ _PyArg_BadArgument(const char *fname, const char *displayname, } static const char * -converterr(const char *expected, PyObject *arg, char *msgbuf, size_t bufsize) +converterr(int flags, const char *expected, PyObject *arg, char *msgbuf, size_t bufsize) { assert(expected != NULL); assert(arg != NULL); @@ -583,20 +586,25 @@ converterr(const char *expected, PyObject *arg, char *msgbuf, size_t bufsize) } else { PyOS_snprintf(msgbuf, bufsize, - "must be %.50s, not %.50s", expected, + "must be %.50s%s, not %.50s", expected, + ((flags & FLAG_NULLABLE) && !strstr(expected, " or None")) + ? " or None" : "", arg == Py_None ? "None" : Py_TYPE(arg)->tp_name); } return msgbuf; } static const char * -convertcharerr(const char *expected, const char *what, Py_ssize_t size, +convertcharerr(int flags, const char *expected, const char *what, Py_ssize_t size, char *msgbuf, size_t bufsize) { assert(expected != NULL); PyOS_snprintf(msgbuf, bufsize, - "must be %.50s, not %.50s of length %zd", - expected, what, size); + "must be %.50s%s, not %.50s of length %zd", + expected, + ((flags & FLAG_NULLABLE) && !strstr(expected, " or None")) + ? " or None" : "", + what, size); return msgbuf; } @@ -616,6 +624,15 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, char *msgbuf, size_t bufsize, freelist_t *freelist) { #define RETURN_ERR_OCCURRED return msgbuf +#define HANDLE_NULLABLE \ + if (*format == '?') { \ + format++; \ + if (arg == Py_None) { \ + break; \ + } \ + flags |= FLAG_NULLABLE; \ + } + const char *format = *p_format; char c = *format++; @@ -625,6 +642,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'b': { /* unsigned byte -- very short int */ unsigned char *p = va_arg(*p_va, unsigned char *); + HANDLE_NULLABLE; long ival = PyLong_AsLong(arg); if (ival == -1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -638,7 +656,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, "unsigned byte integer is greater than maximum"); RETURN_ERR_OCCURRED; } - else *p = (unsigned char) ival; break; } @@ -646,6 +663,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'B': {/* byte sized bitfield - both signed and unsigned values allowed */ unsigned char *p = va_arg(*p_va, unsigned char *); + HANDLE_NULLABLE; unsigned long ival = PyLong_AsUnsignedLongMask(arg); if (ival == (unsigned long)-1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -656,6 +674,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'h': {/* signed short int */ short *p = va_arg(*p_va, short *); + HANDLE_NULLABLE; long ival = PyLong_AsLong(arg); if (ival == -1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -677,6 +696,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'H': { /* short int sized bitfield, both signed and unsigned allowed */ unsigned short *p = va_arg(*p_va, unsigned short *); + HANDLE_NULLABLE; unsigned long ival = PyLong_AsUnsignedLongMask(arg); if (ival == (unsigned long)-1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -687,6 +707,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'i': {/* signed int */ int *p = va_arg(*p_va, int *); + HANDLE_NULLABLE; long ival = PyLong_AsLong(arg); if (ival == -1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -708,6 +729,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'I': { /* int sized bitfield, both signed and unsigned allowed */ unsigned int *p = va_arg(*p_va, unsigned int *); + HANDLE_NULLABLE; unsigned long ival = PyLong_AsUnsignedLongMask(arg); if (ival == (unsigned long)-1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -720,6 +742,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, { PyObject *iobj; Py_ssize_t *p = va_arg(*p_va, Py_ssize_t *); + HANDLE_NULLABLE; Py_ssize_t ival = -1; iobj = _PyNumber_Index(arg); if (iobj != NULL) { @@ -733,6 +756,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } case 'l': {/* long int */ long *p = va_arg(*p_va, long *); + HANDLE_NULLABLE; long ival = PyLong_AsLong(arg); if (ival == -1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -743,17 +767,19 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'k': { /* long sized bitfield */ unsigned long *p = va_arg(*p_va, unsigned long *); + HANDLE_NULLABLE; unsigned long ival; if (PyLong_Check(arg)) ival = PyLong_AsUnsignedLongMask(arg); else - return converterr("int", arg, msgbuf, bufsize); + return converterr(flags, "int", arg, msgbuf, bufsize); *p = ival; break; } case 'L': {/* long long */ long long *p = va_arg( *p_va, long long * ); + HANDLE_NULLABLE; long long ival = PyLong_AsLongLong(arg); if (ival == (long long)-1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -764,17 +790,19 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'K': { /* long long sized bitfield */ unsigned long long *p = va_arg(*p_va, unsigned long long *); + HANDLE_NULLABLE; unsigned long long ival; if (PyLong_Check(arg)) ival = PyLong_AsUnsignedLongLongMask(arg); else - return converterr("int", arg, msgbuf, bufsize); + return converterr(flags, "int", arg, msgbuf, bufsize); *p = ival; break; } case 'f': {/* float */ float *p = va_arg(*p_va, float *); + HANDLE_NULLABLE; double dval = PyFloat_AsDouble(arg); if (dval == -1.0 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -785,6 +813,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'd': {/* double */ double *p = va_arg(*p_va, double *); + HANDLE_NULLABLE; double dval = PyFloat_AsDouble(arg); if (dval == -1.0 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -795,6 +824,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'D': {/* complex double */ Py_complex *p = va_arg(*p_va, Py_complex *); + HANDLE_NULLABLE; Py_complex cval; cval = PyComplex_AsCComplex(arg); if (PyErr_Occurred()) @@ -806,9 +836,10 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'c': {/* char */ char *p = va_arg(*p_va, char *); + HANDLE_NULLABLE; if (PyBytes_Check(arg)) { if (PyBytes_GET_SIZE(arg) != 1) { - return convertcharerr("a byte string of length 1", + return convertcharerr(flags, "a byte string of length 1", "a bytes object", PyBytes_GET_SIZE(arg), msgbuf, bufsize); } @@ -816,27 +847,28 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } else if (PyByteArray_Check(arg)) { if (PyByteArray_GET_SIZE(arg) != 1) { - return convertcharerr("a byte string of length 1", + return convertcharerr(flags, "a byte string of length 1", "a bytearray object", PyByteArray_GET_SIZE(arg), msgbuf, bufsize); } *p = PyByteArray_AS_STRING(arg)[0]; } else - return converterr("a byte string of length 1", arg, msgbuf, bufsize); + return converterr(flags, "a byte string of length 1", arg, msgbuf, bufsize); break; } case 'C': {/* unicode char */ int *p = va_arg(*p_va, int *); + HANDLE_NULLABLE; int kind; const void *data; if (!PyUnicode_Check(arg)) - return converterr("a unicode character", arg, msgbuf, bufsize); + return converterr(flags, "a unicode character", arg, msgbuf, bufsize); if (PyUnicode_GET_LENGTH(arg) != 1) { - return convertcharerr("a unicode character", + return convertcharerr(flags, "a unicode character", "a string", PyUnicode_GET_LENGTH(arg), msgbuf, bufsize); } @@ -849,6 +881,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'p': {/* boolean *p*redicate */ int *p = va_arg(*p_va, int *); + HANDLE_NULLABLE; int val = PyObject_IsTrue(arg); if (val > 0) *p = 1; @@ -867,24 +900,31 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, const char *buf; Py_ssize_t count; if (*format == '*') { - if (getbuffer(arg, (Py_buffer*)p, &buf) < 0) - return converterr(buf, arg, msgbuf, bufsize); format++; + HANDLE_NULLABLE; + if (getbuffer(arg, (Py_buffer*)p, &buf) < 0) + return converterr(flags, buf, arg, msgbuf, bufsize); if (addcleanup(p, freelist, cleanup_buffer)) { return converterr( - "(cleanup problem)", + flags, "(cleanup problem)", arg, msgbuf, bufsize); } break; } - count = convertbuffer(arg, (const void **)p, &buf); - if (count < 0) - return converterr(buf, arg, msgbuf, bufsize); - if (*format == '#') { + else if (*format == '#') { Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*); - *psize = count; format++; - } else { + HANDLE_NULLABLE; + count = convertbuffer(arg, (const void **)p, &buf); + if (count < 0) + return converterr(flags, buf, arg, msgbuf, bufsize); + *psize = count; + } + else { + HANDLE_NULLABLE; + count = convertbuffer(arg, (const void **)p, &buf); + if (count < 0) + return converterr(flags, buf, arg, msgbuf, bufsize); if (strlen(*p) != (size_t)count) { PyErr_SetString(PyExc_ValueError, "embedded null byte"); RETURN_ERR_OCCURRED; @@ -900,32 +940,35 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, /* "s*" or "z*" */ Py_buffer *p = (Py_buffer *)va_arg(*p_va, Py_buffer *); + format++; + HANDLE_NULLABLE; if (c == 'z' && arg == Py_None) PyBuffer_FillInfo(p, NULL, NULL, 0, 1, 0); else if (PyUnicode_Check(arg)) { Py_ssize_t len; sarg = PyUnicode_AsUTF8AndSize(arg, &len); if (sarg == NULL) - return converterr(CONV_UNICODE, + return converterr(flags, CONV_UNICODE, arg, msgbuf, bufsize); PyBuffer_FillInfo(p, arg, (void *)sarg, len, 1, 0); } else { /* any bytes-like object */ const char *buf; if (getbuffer(arg, p, &buf) < 0) - return converterr(buf, arg, msgbuf, bufsize); + return converterr(flags, buf, arg, msgbuf, bufsize); } if (addcleanup(p, freelist, cleanup_buffer)) { return converterr( - "(cleanup problem)", + flags, "(cleanup problem)", arg, msgbuf, bufsize); } - format++; } else if (*format == '#') { /* a string or read-only bytes-like object */ /* "s#" or "z#" */ const void **p = (const void **)va_arg(*p_va, const char **); Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*); + format++; + HANDLE_NULLABLE; if (c == 'z' && arg == Py_None) { *p = NULL; *psize = 0; @@ -934,7 +977,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, Py_ssize_t len; sarg = PyUnicode_AsUTF8AndSize(arg, &len); if (sarg == NULL) - return converterr(CONV_UNICODE, + return converterr(flags, CONV_UNICODE, arg, msgbuf, bufsize); *p = sarg; *psize = len; @@ -944,22 +987,22 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, const char *buf; Py_ssize_t count = convertbuffer(arg, p, &buf); if (count < 0) - return converterr(buf, arg, msgbuf, bufsize); + return converterr(flags, buf, arg, msgbuf, bufsize); *psize = count; } - format++; } else { /* "s" or "z" */ const char **p = va_arg(*p_va, const char **); Py_ssize_t len; sarg = NULL; + HANDLE_NULLABLE; if (c == 'z' && arg == Py_None) *p = NULL; else if (PyUnicode_Check(arg)) { sarg = PyUnicode_AsUTF8AndSize(arg, &len); if (sarg == NULL) - return converterr(CONV_UNICODE, + return converterr(flags, CONV_UNICODE, arg, msgbuf, bufsize); if (strlen(sarg) != (size_t)len) { PyErr_SetString(PyExc_ValueError, "embedded null character"); @@ -968,7 +1011,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, *p = sarg; } else - return converterr(c == 'z' ? "str or None" : "str", + return converterr(flags, c == 'z' ? "str or None" : "str", arg, msgbuf, bufsize); } break; @@ -997,13 +1040,46 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, recode_strings = 0; else return converterr( - "(unknown parser marker combination)", + flags, "(unknown parser marker combination)", arg, msgbuf, bufsize); buffer = (char **)va_arg(*p_va, char **); format++; if (buffer == NULL) - return converterr("(buffer is NULL)", + return converterr(flags, "(buffer is NULL)", arg, msgbuf, bufsize); + Py_ssize_t *psize = NULL; + if (*format == '#') { + /* Using buffer length parameter '#': + + - if *buffer is NULL, a new buffer of the + needed size is allocated and the data + copied into it; *buffer is updated to point + to the new buffer; the caller is + responsible for PyMem_Free()ing it after + usage + + - if *buffer is not NULL, the data is + copied to *buffer; *buffer_len has to be + set to the size of the buffer on input; + buffer overflow is signalled with an error; + buffer has to provide enough room for the + encoded string plus the trailing 0-byte + + - in both cases, *buffer_len is updated to + the size of the buffer /excluding/ the + trailing 0-byte + + */ + psize = va_arg(*p_va, Py_ssize_t*); + + format++; + if (psize == NULL) { + return converterr( + flags, "(buffer_len is NULL)", + arg, msgbuf, bufsize); + } + } + HANDLE_NULLABLE; /* Encode object */ if (!recode_strings && @@ -1024,7 +1100,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, encoding, NULL); if (s == NULL) - return converterr("(encoding failed)", + return converterr(flags, "(encoding failed)", arg, msgbuf, bufsize); assert(PyBytes_Check(s)); size = PyBytes_GET_SIZE(s); @@ -1034,42 +1110,12 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } else { return converterr( - recode_strings ? "str" : "str, bytes or bytearray", + flags, recode_strings ? "str" : "str, bytes or bytearray", arg, msgbuf, bufsize); } /* Write output; output is guaranteed to be 0-terminated */ - if (*format == '#') { - /* Using buffer length parameter '#': - - - if *buffer is NULL, a new buffer of the - needed size is allocated and the data - copied into it; *buffer is updated to point - to the new buffer; the caller is - responsible for PyMem_Free()ing it after - usage - - - if *buffer is not NULL, the data is - copied to *buffer; *buffer_len has to be - set to the size of the buffer on input; - buffer overflow is signalled with an error; - buffer has to provide enough room for the - encoded string plus the trailing 0-byte - - - in both cases, *buffer_len is updated to - the size of the buffer /excluding/ the - trailing 0-byte - - */ - Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*); - - format++; - if (psize == NULL) { - Py_DECREF(s); - return converterr( - "(buffer_len is NULL)", - arg, msgbuf, bufsize); - } + if (psize != NULL) { if (*buffer == NULL) { *buffer = PyMem_NEW(char, size + 1); if (*buffer == NULL) { @@ -1080,7 +1126,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (addcleanup(buffer, freelist, cleanup_ptr)) { Py_DECREF(s); return converterr( - "(cleanup problem)", + flags, "(cleanup problem)", arg, msgbuf, bufsize); } } else { @@ -1114,7 +1160,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if ((Py_ssize_t)strlen(ptr) != size) { Py_DECREF(s); return converterr( - "encoded string without null bytes", + flags, "encoded string without null bytes", arg, msgbuf, bufsize); } *buffer = PyMem_NEW(char, size + 1); @@ -1125,7 +1171,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } if (addcleanup(buffer, freelist, cleanup_ptr)) { Py_DECREF(s); - return converterr("(cleanup problem)", + return converterr(flags, "(cleanup problem)", arg, msgbuf, bufsize); } memcpy(*buffer, ptr, size+1); @@ -1136,29 +1182,32 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'S': { /* PyBytes object */ PyObject **p = va_arg(*p_va, PyObject **); + HANDLE_NULLABLE; if (PyBytes_Check(arg)) *p = arg; else - return converterr("bytes", arg, msgbuf, bufsize); + return converterr(flags, "bytes", arg, msgbuf, bufsize); break; } case 'Y': { /* PyByteArray object */ PyObject **p = va_arg(*p_va, PyObject **); + HANDLE_NULLABLE; if (PyByteArray_Check(arg)) *p = arg; else - return converterr("bytearray", arg, msgbuf, bufsize); + return converterr(flags, "bytearray", arg, msgbuf, bufsize); break; } case 'U': { /* PyUnicode object */ PyObject **p = va_arg(*p_va, PyObject **); + HANDLE_NULLABLE; if (PyUnicode_Check(arg)) { *p = arg; } else - return converterr("str", arg, msgbuf, bufsize); + return converterr(flags, "str", arg, msgbuf, bufsize); break; } @@ -1169,10 +1218,11 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, type = va_arg(*p_va, PyTypeObject*); p = va_arg(*p_va, PyObject **); format++; + HANDLE_NULLABLE; if (PyType_IsSubtype(Py_TYPE(arg), type)) *p = arg; else - return converterr(type->tp_name, arg, msgbuf, bufsize); + return converterr(flags, type->tp_name, arg, msgbuf, bufsize); } else if (*format == '&') { @@ -1181,16 +1231,18 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, void *addr = va_arg(*p_va, void *); int res; format++; + HANDLE_NULLABLE; if (! (res = (*convert)(arg, addr))) - return converterr("(unspecified)", + return converterr(flags, "(unspecified)", arg, msgbuf, bufsize); if (res == Py_CLEANUP_SUPPORTED && addcleanup(addr, freelist, convert) == -1) - return converterr("(cleanup problem)", + return converterr(flags, "(cleanup problem)", arg, msgbuf, bufsize); } else { p = va_arg(*p_va, PyObject **); + HANDLE_NULLABLE; *p = arg; } break; @@ -1202,29 +1254,30 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (*format != '*') return converterr( - "(invalid use of 'w' format character)", + flags, "(invalid use of 'w' format character)", arg, msgbuf, bufsize); format++; + HANDLE_NULLABLE; /* Caller is interested in Py_buffer, and the object supports it directly. The request implicitly asks for PyBUF_SIMPLE, so the result is C-contiguous with format 'B'. */ if (PyObject_GetBuffer(arg, (Py_buffer*)p, PyBUF_WRITABLE) < 0) { PyErr_Clear(); - return converterr("read-write bytes-like object", + return converterr(flags, "read-write bytes-like object", arg, msgbuf, bufsize); } assert(PyBuffer_IsContiguous((Py_buffer *)p, 'C')); if (addcleanup(p, freelist, cleanup_buffer)) { return converterr( - "(cleanup problem)", + flags, "(cleanup problem)", arg, msgbuf, bufsize); } break; } default: - return converterr("(impossible)", arg, msgbuf, bufsize); + return converterr(flags, "(impossible)", arg, msgbuf, bufsize); } @@ -2754,6 +2807,9 @@ skipitem(const char **p_format, va_list *p_va, int flags) return "impossible"; } + if (*format == '?') { + format++; + } *p_format = format; return NULL; From 19be6b792a026b9c2fd4ca667cf1ec340ddbc4c0 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 3 Jul 2024 10:11:25 +0300 Subject: [PATCH 2/9] Add more tests. --- Lib/test/test_capi/test_getargs.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_capi/test_getargs.py b/Lib/test/test_capi/test_getargs.py index cc356cc6132a91..525173c7073a4f 100644 --- a/Lib/test/test_capi/test_getargs.py +++ b/Lib/test/test_capi/test_getargs.py @@ -1402,7 +1402,14 @@ def check(format, arg, allows_none=False): check('Y', bytearray(b'bytearray')) check('O', object, allows_none=True) - # TODO: Not tested: es?, es#?, et?, et#?, O!, O& + parse((None,), {}, 'es?', ['a']) + parse((None,), {}, 'es#?', ['a']) + parse((None,), {}, 'et?', ['a']) + parse((None,), {}, 'et#?', ['a']) + parse((None,), {}, 'O!?', ['a']) + parse((None,), {}, 'O&?', ['a']) + + # TODO: More tests for es?, es#?, et?, et#?, O!, O& # TODO: Not implemented: (...)? @unittest.skipIf(_testinternalcapi is None, 'needs _testinternalcapi') From cd90539cbdae85877fb1516c03fbe5a13978df65 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 3 Jul 2024 18:38:50 +0300 Subject: [PATCH 3/9] Add more tests. --- Lib/test/test_capi/test_getargs.py | 53 ++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/Lib/test/test_capi/test_getargs.py b/Lib/test/test_capi/test_getargs.py index 525173c7073a4f..9490c78d328e6c 100644 --- a/Lib/test/test_capi/test_getargs.py +++ b/Lib/test/test_capi/test_getargs.py @@ -1352,6 +1352,59 @@ def test_nested_tuple(self): "argument 1 must be sequence of length 1, not 0"): parse(((),), {}, '(' + f + ')', ['a']) + def test_specific_type_errors(self): + parse = _testcapi.parse_tuple_and_keywords + + def check(format, arg, expected, got='list'): + errmsg = f'must be {expected}, not {got}' + with self.assertRaisesRegex(TypeError, errmsg): + parse((arg,), {}, format, ['a']) + + check('k', [], 'int') + check('k?', [], 'int or None') + check('K', [], 'int') + check('K?', [], 'int or None') + check('c', [], 'a byte string of length 1') + check('c?', [], 'a byte string of length 1 or None') + check('c', b'abc', 'a byte string of length 1', + 'a bytes object of length 3') + check('c?', b'abc', 'a byte string of length 1 or None', + 'a bytes object of length 3') + check('c', bytearray(b'abc'), 'a byte string of length 1', + 'a bytearray object of length 3') + check('c?', bytearray(b'abc'), 'a byte string of length 1 or None', + 'a bytearray object of length 3') + check('C', [], 'a unicode character') + check('C?', [], 'a unicode character or None') + check('C', 'abc', 'a unicode character', + 'a string of length 3') + check('C?', 'abc', 'a unicode character or None', + 'a string of length 3') + check('s', [], 'str') + check('s?', [], 'str or None') + check('z', [], 'str or None') + check('z?', [], 'str or None') + check('es', [], 'str') + check('es?', [], 'str or None') + check('es#', [], 'str') + check('es#?', [], 'str or None') + check('et', [], 'str, bytes or bytearray') + check('et?', [], 'str, bytes or bytearray or None') + check('et#', [], 'str, bytes or bytearray') + check('et#?', [], 'str, bytes or bytearray or None') + check('w*', [], 'read-write bytes-like object') + check('w*?', [], 'read-write bytes-like object or None') + check('S', [], 'bytes') + check('S?', [], 'bytes or None') + check('U', [], 'str') + check('U?', [], 'str or None') + check('Y', [], 'bytearray') + check('Y?', [], 'bytearray or None') + check('(OO)', 42, '2-item sequence', 'int') + check('(OO)', (1, 2, 3), 'sequence of length 2', '3') + + # TODO: Not implemented: (...)? + def test_nullable(self): parse = _testcapi.parse_tuple_and_keywords From 75e8c5e4e0309fb8ba0f31931caffaf9f5bc402e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 4 Jul 2024 09:40:00 +0300 Subject: [PATCH 4/9] Implement (...)?. --- Lib/test/test_capi/test_getargs.py | 10 +++++++--- Python/getargs.c | 29 ++++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_capi/test_getargs.py b/Lib/test/test_capi/test_getargs.py index 9490c78d328e6c..b16e2455af61e8 100644 --- a/Lib/test/test_capi/test_getargs.py +++ b/Lib/test/test_capi/test_getargs.py @@ -1401,10 +1401,9 @@ def check(format, arg, expected, got='list'): check('Y', [], 'bytearray') check('Y?', [], 'bytearray or None') check('(OO)', 42, '2-item sequence', 'int') + check('(OO)?', 42, '2-item sequence or None', 'int') check('(OO)', (1, 2, 3), 'sequence of length 2', '3') - # TODO: Not implemented: (...)? - def test_nullable(self): parse = _testcapi.parse_tuple_and_keywords @@ -1455,6 +1454,12 @@ def check(format, arg, allows_none=False): check('Y', bytearray(b'bytearray')) check('O', object, allows_none=True) + check('(OO)', (1, 2)) + self.assertEqual(parse((((1, 2), 3),), {}, '((OO)?O)', ['a']), (1, 2, 3)) + self.assertEqual(parse(((None, 3),), {}, '((OO)?O)', ['a']), (NULL, NULL, 3)) + self.assertEqual(parse((((1, 2), 3),), {}, '((OO)O)', ['a']), (1, 2, 3)) + self.assertRaises(TypeError, parse, ((None, 3),), {}, '((OO)O)', ['a']) + parse((None,), {}, 'es?', ['a']) parse((None,), {}, 'es#?', ['a']) parse((None,), {}, 'et?', ['a']) @@ -1463,7 +1468,6 @@ def check(format, arg, allows_none=False): parse((None,), {}, 'O&?', ['a']) # TODO: More tests for es?, es#?, et?, et#?, O!, O& - # TODO: Not implemented: (...)? @unittest.skipIf(_testinternalcapi is None, 'needs _testinternalcapi') def test_gh_119213(self): diff --git a/Python/getargs.c b/Python/getargs.c index b2aad4696b6c52..096be328cf1036 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -54,6 +54,7 @@ static const char *converttuple(PyObject *, const char **, va_list *, int, int *, char *, size_t, freelist_t *); static const char *convertsimple(PyObject *, const char **, va_list *, int, char *, size_t, freelist_t *); +static const char * converterr(int, const char *, PyObject *, char *, size_t); static Py_ssize_t convertbuffer(PyObject *, const void **p, const char **); static int getbuffer(PyObject *, Py_buffer *, const char**); @@ -467,6 +468,8 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, int i; Py_ssize_t len; + assert(*format == '('); + format++; for (;;) { int c = *format++; if (c == '(') { @@ -475,8 +478,12 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, level++; } else if (c == ')') { - if (level == 0) + if (level == 0) { + if (*format == '?') { + flags |= FLAG_NULLABLE; + } break; + } level--; } else if (c == ':' || c == ';' || c == '\0') @@ -485,6 +492,17 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, n++; } + if (arg == Py_None && (flags & FLAG_NULLABLE)) { + format = *p_format; + const char *msg = skipitem(&format, p_va, flags); + if (msg == NULL) { + *p_format = format; + } + else { + levels[0] = 0; + } + return msg; + } if (!PySequence_Check(arg) || PyBytes_Check(arg)) { levels[0] = 0; PyOS_snprintf(msgbuf, bufsize, @@ -505,7 +523,7 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } flags &= ~FLAG_NULLABLE; - format = *p_format; + format = *p_format + 1; for (i = 0; i < n; i++) { const char *msg; PyObject *item; @@ -527,6 +545,10 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } } + format++; + if (*format == '?') { + format++; + } *p_format = format; return NULL; } @@ -542,11 +564,8 @@ convertitem(PyObject *arg, const char **p_format, va_list *p_va, int flags, const char *format = *p_format; if (*format == '(' /* ')' */) { - format++; msg = converttuple(arg, &format, p_va, flags, levels, msgbuf, bufsize, freelist); - if (msg == NULL) - format++; } else { msg = convertsimple(arg, &format, p_va, flags, From 02e05f61ca949babf2890bfcb5051e62f363c495 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 4 Jul 2024 09:46:44 +0300 Subject: [PATCH 5/9] Improve error message for et? and et#?. --- Lib/test/test_capi/test_getargs.py | 4 ++-- Python/getargs.c | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_capi/test_getargs.py b/Lib/test/test_capi/test_getargs.py index b16e2455af61e8..75d49d071ef81b 100644 --- a/Lib/test/test_capi/test_getargs.py +++ b/Lib/test/test_capi/test_getargs.py @@ -1389,9 +1389,9 @@ def check(format, arg, expected, got='list'): check('es#', [], 'str') check('es#?', [], 'str or None') check('et', [], 'str, bytes or bytearray') - check('et?', [], 'str, bytes or bytearray or None') + check('et?', [], 'str, bytes, bytearray or None') check('et#', [], 'str, bytes or bytearray') - check('et#?', [], 'str, bytes or bytearray or None') + check('et#?', [], 'str, bytes, bytearray or None') check('w*', [], 'read-write bytes-like object') check('w*?', [], 'read-write bytes-like object or None') check('S', [], 'bytes') diff --git a/Python/getargs.c b/Python/getargs.c index 096be328cf1036..4fcd5e88098f80 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -1129,7 +1129,9 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } else { return converterr( - flags, recode_strings ? "str" : "str, bytes or bytearray", + flags, + recode_strings ? "str" : (flags & FLAG_NULLABLE) ? + "str, bytes, bytearray" : "str, bytes or bytearray", arg, msgbuf, bufsize); } From 9261aee2f1d86cb084528314a3b3ec8f922b25a5 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 4 Jul 2024 11:35:28 +0300 Subject: [PATCH 6/9] Refactoring. --- Python/getargs.c | 110 +++++++++++++++++++++++------------------------ 1 file changed, 53 insertions(+), 57 deletions(-) diff --git a/Python/getargs.c b/Python/getargs.c index 4fcd5e88098f80..7905fc710824b7 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -1,6 +1,8 @@ /* New getargs implementation */ +#include + #define PY_CXX_CONST const #include "Python.h" #include "pycore_abstract.h" // _PyNumber_Index() @@ -21,7 +23,6 @@ PyAPI_FUNC(int) _PyArg_VaParseTupleAndKeywords_SizeT(PyObject *, PyObject *, const char *, const char * const *, va_list); #define FLAG_COMPAT 1 -#define FLAG_NULLABLE 4 typedef int (*destr_t)(PyObject *, void *); @@ -54,7 +55,6 @@ static const char *converttuple(PyObject *, const char **, va_list *, int, int *, char *, size_t, freelist_t *); static const char *convertsimple(PyObject *, const char **, va_list *, int, char *, size_t, freelist_t *); -static const char * converterr(int, const char *, PyObject *, char *, size_t); static Py_ssize_t convertbuffer(PyObject *, const void **p, const char **); static int getbuffer(PyObject *, Py_buffer *, const char**); @@ -467,6 +467,7 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, const char *format = *p_format; int i; Py_ssize_t len; + bool nullable = false; assert(*format == '('); format++; @@ -480,7 +481,7 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, else if (c == ')') { if (level == 0) { if (*format == '?') { - flags |= FLAG_NULLABLE; + nullable = true; } break; } @@ -492,13 +493,9 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, n++; } - if (arg == Py_None && (flags & FLAG_NULLABLE)) { - format = *p_format; - const char *msg = skipitem(&format, p_va, flags); - if (msg == NULL) { - *p_format = format; - } - else { + if (arg == Py_None && nullable) { + const char *msg = skipitem(p_format, p_va, flags); + if (msg != NULL) { levels[0] = 0; } return msg; @@ -508,7 +505,7 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, PyOS_snprintf(msgbuf, bufsize, "must be %d-item sequence%s, not %.50s", n, - (flags & FLAG_NULLABLE) ? " or None" : "", + nullable ? " or None" : "", arg == Py_None ? "None" : Py_TYPE(arg)->tp_name); return msgbuf; } @@ -522,7 +519,6 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, return msgbuf; } - flags &= ~FLAG_NULLABLE; format = *p_format + 1; for (i = 0; i < n; i++) { const char *msg; @@ -595,7 +591,7 @@ _PyArg_BadArgument(const char *fname, const char *displayname, } static const char * -converterr(int flags, const char *expected, PyObject *arg, char *msgbuf, size_t bufsize) +converterr(bool nullable, const char *expected, PyObject *arg, char *msgbuf, size_t bufsize) { assert(expected != NULL); assert(arg != NULL); @@ -606,23 +602,21 @@ converterr(int flags, const char *expected, PyObject *arg, char *msgbuf, size_t else { PyOS_snprintf(msgbuf, bufsize, "must be %.50s%s, not %.50s", expected, - ((flags & FLAG_NULLABLE) && !strstr(expected, " or None")) - ? " or None" : "", + nullable ? " or None" : "", arg == Py_None ? "None" : Py_TYPE(arg)->tp_name); } return msgbuf; } static const char * -convertcharerr(int flags, const char *expected, const char *what, Py_ssize_t size, +convertcharerr(bool nullable, const char *expected, const char *what, Py_ssize_t size, char *msgbuf, size_t bufsize) { assert(expected != NULL); PyOS_snprintf(msgbuf, bufsize, "must be %.50s%s, not %.50s of length %zd", expected, - ((flags & FLAG_NULLABLE) && !strstr(expected, " or None")) - ? " or None" : "", + nullable ? " or None" : "", what, size); return msgbuf; } @@ -649,13 +643,14 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (arg == Py_None) { \ break; \ } \ - flags |= FLAG_NULLABLE; \ + nullable = true; \ } const char *format = *p_format; char c = *format++; const char *sarg; + bool nullable = false; switch (c) { @@ -791,7 +786,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (PyLong_Check(arg)) ival = PyLong_AsUnsignedLongMask(arg); else - return converterr(flags, "int", arg, msgbuf, bufsize); + return converterr(nullable, "int", arg, msgbuf, bufsize); *p = ival; break; } @@ -814,7 +809,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (PyLong_Check(arg)) ival = PyLong_AsUnsignedLongLongMask(arg); else - return converterr(flags, "int", arg, msgbuf, bufsize); + return converterr(nullable, "int", arg, msgbuf, bufsize); *p = ival; break; } @@ -858,7 +853,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, HANDLE_NULLABLE; if (PyBytes_Check(arg)) { if (PyBytes_GET_SIZE(arg) != 1) { - return convertcharerr(flags, "a byte string of length 1", + return convertcharerr(nullable, "a byte string of length 1", "a bytes object", PyBytes_GET_SIZE(arg), msgbuf, bufsize); } @@ -866,14 +861,14 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } else if (PyByteArray_Check(arg)) { if (PyByteArray_GET_SIZE(arg) != 1) { - return convertcharerr(flags, "a byte string of length 1", + return convertcharerr(nullable, "a byte string of length 1", "a bytearray object", PyByteArray_GET_SIZE(arg), msgbuf, bufsize); } *p = PyByteArray_AS_STRING(arg)[0]; } else - return converterr(flags, "a byte string of length 1", arg, msgbuf, bufsize); + return converterr(nullable, "a byte string of length 1", arg, msgbuf, bufsize); break; } @@ -884,10 +879,10 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, const void *data; if (!PyUnicode_Check(arg)) - return converterr(flags, "a unicode character", arg, msgbuf, bufsize); + return converterr(nullable, "a unicode character", arg, msgbuf, bufsize); if (PyUnicode_GET_LENGTH(arg) != 1) { - return convertcharerr(flags, "a unicode character", + return convertcharerr(nullable, "a unicode character", "a string", PyUnicode_GET_LENGTH(arg), msgbuf, bufsize); } @@ -922,10 +917,10 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, format++; HANDLE_NULLABLE; if (getbuffer(arg, (Py_buffer*)p, &buf) < 0) - return converterr(flags, buf, arg, msgbuf, bufsize); + return converterr(nullable, buf, arg, msgbuf, bufsize); if (addcleanup(p, freelist, cleanup_buffer)) { return converterr( - flags, "(cleanup problem)", + nullable, "(cleanup problem)", arg, msgbuf, bufsize); } break; @@ -936,14 +931,14 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, HANDLE_NULLABLE; count = convertbuffer(arg, (const void **)p, &buf); if (count < 0) - return converterr(flags, buf, arg, msgbuf, bufsize); + return converterr(nullable, buf, arg, msgbuf, bufsize); *psize = count; } else { HANDLE_NULLABLE; count = convertbuffer(arg, (const void **)p, &buf); if (count < 0) - return converterr(flags, buf, arg, msgbuf, bufsize); + return converterr(nullable, buf, arg, msgbuf, bufsize); if (strlen(*p) != (size_t)count) { PyErr_SetString(PyExc_ValueError, "embedded null byte"); RETURN_ERR_OCCURRED; @@ -967,18 +962,18 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, Py_ssize_t len; sarg = PyUnicode_AsUTF8AndSize(arg, &len); if (sarg == NULL) - return converterr(flags, CONV_UNICODE, + return converterr(nullable, CONV_UNICODE, arg, msgbuf, bufsize); PyBuffer_FillInfo(p, arg, (void *)sarg, len, 1, 0); } else { /* any bytes-like object */ const char *buf; if (getbuffer(arg, p, &buf) < 0) - return converterr(flags, buf, arg, msgbuf, bufsize); + return converterr(nullable, buf, arg, msgbuf, bufsize); } if (addcleanup(p, freelist, cleanup_buffer)) { return converterr( - flags, "(cleanup problem)", + nullable, "(cleanup problem)", arg, msgbuf, bufsize); } } else if (*format == '#') { /* a string or read-only bytes-like object */ @@ -996,7 +991,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, Py_ssize_t len; sarg = PyUnicode_AsUTF8AndSize(arg, &len); if (sarg == NULL) - return converterr(flags, CONV_UNICODE, + return converterr(nullable, CONV_UNICODE, arg, msgbuf, bufsize); *p = sarg; *psize = len; @@ -1006,7 +1001,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, const char *buf; Py_ssize_t count = convertbuffer(arg, p, &buf); if (count < 0) - return converterr(flags, buf, arg, msgbuf, bufsize); + return converterr(nullable, buf, arg, msgbuf, bufsize); *psize = count; } } else { @@ -1021,7 +1016,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, else if (PyUnicode_Check(arg)) { sarg = PyUnicode_AsUTF8AndSize(arg, &len); if (sarg == NULL) - return converterr(flags, CONV_UNICODE, + return converterr(nullable, CONV_UNICODE, arg, msgbuf, bufsize); if (strlen(sarg) != (size_t)len) { PyErr_SetString(PyExc_ValueError, "embedded null character"); @@ -1030,7 +1025,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, *p = sarg; } else - return converterr(flags, c == 'z' ? "str or None" : "str", + return converterr(c == 'z' || nullable, "str", arg, msgbuf, bufsize); } break; @@ -1059,12 +1054,12 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, recode_strings = 0; else return converterr( - flags, "(unknown parser marker combination)", + nullable, "(unknown parser marker combination)", arg, msgbuf, bufsize); buffer = (char **)va_arg(*p_va, char **); format++; if (buffer == NULL) - return converterr(flags, "(buffer is NULL)", + return converterr(nullable, "(buffer is NULL)", arg, msgbuf, bufsize); Py_ssize_t *psize = NULL; if (*format == '#') { @@ -1094,7 +1089,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, format++; if (psize == NULL) { return converterr( - flags, "(buffer_len is NULL)", + nullable, "(buffer_len is NULL)", arg, msgbuf, bufsize); } } @@ -1119,7 +1114,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, encoding, NULL); if (s == NULL) - return converterr(flags, "(encoding failed)", + return converterr(nullable, "(encoding failed)", arg, msgbuf, bufsize); assert(PyBytes_Check(s)); size = PyBytes_GET_SIZE(s); @@ -1129,9 +1124,10 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } else { return converterr( - flags, - recode_strings ? "str" : (flags & FLAG_NULLABLE) ? - "str, bytes, bytearray" : "str, bytes or bytearray", + nullable, + recode_strings ? "str" + : nullable ? "str, bytes, bytearray" + : "str, bytes or bytearray", arg, msgbuf, bufsize); } @@ -1147,7 +1143,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (addcleanup(buffer, freelist, cleanup_ptr)) { Py_DECREF(s); return converterr( - flags, "(cleanup problem)", + nullable, "(cleanup problem)", arg, msgbuf, bufsize); } } else { @@ -1181,7 +1177,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if ((Py_ssize_t)strlen(ptr) != size) { Py_DECREF(s); return converterr( - flags, "encoded string without null bytes", + nullable, "encoded string without null bytes", arg, msgbuf, bufsize); } *buffer = PyMem_NEW(char, size + 1); @@ -1192,7 +1188,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } if (addcleanup(buffer, freelist, cleanup_ptr)) { Py_DECREF(s); - return converterr(flags, "(cleanup problem)", + return converterr(nullable, "(cleanup problem)", arg, msgbuf, bufsize); } memcpy(*buffer, ptr, size+1); @@ -1207,7 +1203,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (PyBytes_Check(arg)) *p = arg; else - return converterr(flags, "bytes", arg, msgbuf, bufsize); + return converterr(nullable, "bytes", arg, msgbuf, bufsize); break; } @@ -1217,7 +1213,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (PyByteArray_Check(arg)) *p = arg; else - return converterr(flags, "bytearray", arg, msgbuf, bufsize); + return converterr(nullable, "bytearray", arg, msgbuf, bufsize); break; } @@ -1228,7 +1224,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, *p = arg; } else - return converterr(flags, "str", arg, msgbuf, bufsize); + return converterr(nullable, "str", arg, msgbuf, bufsize); break; } @@ -1243,7 +1239,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (PyType_IsSubtype(Py_TYPE(arg), type)) *p = arg; else - return converterr(flags, type->tp_name, arg, msgbuf, bufsize); + return converterr(nullable, type->tp_name, arg, msgbuf, bufsize); } else if (*format == '&') { @@ -1254,11 +1250,11 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, format++; HANDLE_NULLABLE; if (! (res = (*convert)(arg, addr))) - return converterr(flags, "(unspecified)", + return converterr(nullable, "(unspecified)", arg, msgbuf, bufsize); if (res == Py_CLEANUP_SUPPORTED && addcleanup(addr, freelist, convert) == -1) - return converterr(flags, "(cleanup problem)", + return converterr(nullable, "(cleanup problem)", arg, msgbuf, bufsize); } else { @@ -1275,7 +1271,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (*format != '*') return converterr( - flags, "(invalid use of 'w' format character)", + nullable, "(invalid use of 'w' format character)", arg, msgbuf, bufsize); format++; HANDLE_NULLABLE; @@ -1285,20 +1281,20 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, result is C-contiguous with format 'B'. */ if (PyObject_GetBuffer(arg, (Py_buffer*)p, PyBUF_WRITABLE) < 0) { PyErr_Clear(); - return converterr(flags, "read-write bytes-like object", + return converterr(nullable, "read-write bytes-like object", arg, msgbuf, bufsize); } assert(PyBuffer_IsContiguous((Py_buffer *)p, 'C')); if (addcleanup(p, freelist, cleanup_buffer)) { return converterr( - flags, "(cleanup problem)", + nullable, "(cleanup problem)", arg, msgbuf, bufsize); } break; } default: - return converterr(flags, "(impossible)", arg, msgbuf, bufsize); + return converterr(nullable, "(impossible)", arg, msgbuf, bufsize); } From 8df697e3a86fafdf15cf442c8ba391a4e4d65c29 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 9 Jan 2025 11:07:17 +0200 Subject: [PATCH 7/9] Add more examples. --- Modules/_interpretersmodule.c | 9 +++------ Modules/_threadmodule.c | 4 ++-- Modules/mmapmodule.c | 12 +++--------- 3 files changed, 8 insertions(+), 17 deletions(-) diff --git a/Modules/_interpretersmodule.c b/Modules/_interpretersmodule.c index a36823c4bb982b..03c29e9ae2350d 100644 --- a/Modules/_interpretersmodule.c +++ b/Modules/_interpretersmodule.c @@ -1245,14 +1245,11 @@ interp_get_config(PyObject *self, PyObject *args, PyObject *kwds) PyObject *idobj = NULL; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "O|$p:get_config", kwlist, + "O?|$p:get_config", kwlist, &idobj, &restricted)) { return NULL; } - if (idobj == Py_None) { - idobj = NULL; - } int reqready = 0; PyInterpreterState *interp = \ @@ -1369,14 +1366,14 @@ capture_exception(PyObject *self, PyObject *args, PyObject *kwds) static char *kwlist[] = {"exc", NULL}; PyObject *exc_arg = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "|O:capture_exception", kwlist, + "|O?:capture_exception", kwlist, &exc_arg)) { return NULL; } PyObject *exc = exc_arg; - if (exc == NULL || exc == Py_None) { + if (exc == NULL) { exc = PyErr_GetRaisedException(); if (exc == NULL) { Py_RETURN_NONE; diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index fd0ff279d9c5a6..56a568f6dbd920 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -641,12 +641,12 @@ PyThreadHandleObject_join(PyObject *op, PyObject *args) PyThreadHandleObject *self = (PyThreadHandleObject*)op; PyObject *timeout_obj = NULL; - if (!PyArg_ParseTuple(args, "|O:join", &timeout_obj)) { + if (!PyArg_ParseTuple(args, "|O?:join", &timeout_obj)) { return NULL; } PyTime_t timeout_ns = -1; - if (timeout_obj != NULL && timeout_obj != Py_None) { + if (timeout_obj != NULL) { if (_PyTime_FromSecondsObject(&timeout_ns, timeout_obj, _PyTime_ROUND_TIMEOUT) < 0) { return NULL; diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c index c7a730bcea237e..ad45ffbac76424 100644 --- a/Modules/mmapmodule.c +++ b/Modules/mmapmodule.c @@ -1689,7 +1689,7 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) DWORD off_lo; /* lower 32 bits of offset */ DWORD size_hi; /* upper 32 bits of size */ DWORD size_lo; /* lower 32 bits of size */ - PyObject *tagname = Py_None; + PyObject *tagname = NULL; DWORD dwErr = 0; int fileno; HANDLE fh = 0; @@ -1699,7 +1699,7 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) "tagname", "access", "offset", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwdict, "in|OiL", keywords, + if (!PyArg_ParseTupleAndKeywords(args, kwdict, "in|U?iL", keywords, &fileno, &map_size, &tagname, &access, &offset)) { return NULL; @@ -1832,13 +1832,7 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) m_obj->weakreflist = NULL; m_obj->exports = 0; /* set the tag name */ - if (!Py_IsNone(tagname)) { - if (!PyUnicode_Check(tagname)) { - Py_DECREF(m_obj); - return PyErr_Format(PyExc_TypeError, "expected str or None for " - "'tagname', not %.200s", - Py_TYPE(tagname)->tp_name); - } + if (tagname != NULL) { m_obj->tagname = PyUnicode_AsWideCharString(tagname, NULL); if (m_obj->tagname == NULL) { Py_DECREF(m_obj); From 7c1c3354456227602871058869de04fd8c959dd5 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 8 Apr 2025 15:03:46 +0300 Subject: [PATCH 8/9] Fix test for changed error message. --- Lib/test/test_mmap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_mmap.py b/Lib/test/test_mmap.py index b2a299ed172967..fd4197b7086976 100644 --- a/Lib/test/test_mmap.py +++ b/Lib/test/test_mmap.py @@ -732,7 +732,7 @@ def test_tagname(self): m2.close() m1.close() - with self.assertRaisesRegex(TypeError, 'tagname'): + with self.assertRaisesRegex(TypeError, 'must be str or None'): mmap.mmap(-1, 8, tagname=1) @cpython_only From d30e8094015d6003671c1f1760786d73b333eb9a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 8 Apr 2025 15:33:03 +0300 Subject: [PATCH 9/9] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Doc/c-api/arg.rst | 2 +- .../C_API/2025-01-08-18-55-57.gh-issue-112068.ofI5Fl.rst | 2 +- Modules/_ctypes/_ctypes.c | 3 +-- Python/getargs.c | 6 +++--- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index 81dc7bd3e3fd70..0375bc57b6cf1e 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -375,7 +375,7 @@ Other objects If the argument is not ``None``, it is parsed according to the specified format unit. - .. versionadded:: 3.14 + .. versionadded:: next A few other characters have a meaning in a format string. These may not occur inside nested parentheses. They are: diff --git a/Misc/NEWS.d/next/C_API/2025-01-08-18-55-57.gh-issue-112068.ofI5Fl.rst b/Misc/NEWS.d/next/C_API/2025-01-08-18-55-57.gh-issue-112068.ofI5Fl.rst index 6022eb4306581f..d49b1735825e8a 100644 --- a/Misc/NEWS.d/next/C_API/2025-01-08-18-55-57.gh-issue-112068.ofI5Fl.rst +++ b/Misc/NEWS.d/next/C_API/2025-01-08-18-55-57.gh-issue-112068.ofI5Fl.rst @@ -1,3 +1,3 @@ Add support of nullable arguments in :c:func:`PyArg_Parse` and similar -functions. Adding ``?`` after any format unit makea ``None`` be accepted as +functions. Adding ``?`` after any format unit makes ``None`` be accepted as a value. diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index facc500ee10994..55e5eee0eb081a 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -3848,8 +3848,7 @@ _validate_paramflags(ctypes_state *st, PyTypeObject *type, PyObject *paramflags) PyObject *name = Py_None; PyObject *defval; PyObject *typ; - if (!PyArg_ParseTuple(item, "i|U?O", &flag, &name, &defval)) - { + if (!PyArg_ParseTuple(item, "i|U?O", &flag, &name, &defval)) { PyErr_SetString(PyExc_TypeError, "paramflags must be a sequence of (int [,string [,value]]) tuples"); return 0; diff --git a/Python/getargs.c b/Python/getargs.c index 6dfe64f7fc0826..53cc910a26d5c9 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -505,9 +505,9 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, levels[0] = 0; PyOS_snprintf(msgbuf, bufsize, "must be %d-item sequence%s, not %.50s", - n, - nullable ? " or None" : "", - arg == Py_None ? "None" : Py_TYPE(arg)->tp_name); + n, + nullable ? " or None" : "", + arg == Py_None ? "None" : Py_TYPE(arg)->tp_name); return msgbuf; } pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy