diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 339ee35c7aa474..99afebd762a456 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -477,9 +477,6 @@ APIs: | | | :c:func:`PyObject_Repr`. | +-------------------+---------------------+----------------------------------+ - An unrecognized format character causes all the rest of the format string to be - copied as-is to the result string, and any extra arguments discarded. - .. note:: The width formatter unit is number of characters rather than bytes. The precision formatter unit is number of bytes for ``"%s"`` and @@ -500,6 +497,11 @@ APIs: Support width and precision formatter for ``"%s"``, ``"%A"``, ``"%U"``, ``"%V"``, ``"%S"``, ``"%R"`` added. + .. versionchanged:: 3.12 + An unrecognized format character now sets a :exc:`SystemError`. + In previous versions it caused all the rest of the format string to be + copied as-is to the result string, and any extra arguments discarded. + .. c:function:: PyObject* PyUnicode_FromFormatV(const char *format, va_list vargs) diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index ddf9e1f6a59b47..02215524cfd671 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -461,6 +461,12 @@ Porting to Python 3.12 :py:meth:`~class.__subclasses__` (using :c:func:`PyObject_CallMethod`, for example). +* An unrecognized format character in :c:func:`PyUnicode_FromFormat` and + :c:func:`PyUnicode_FromFormatV` now sets a :exc:`SystemError`. + In previous versions it caused all the rest of the format string to be + copied as-is to the result string, and any extra arguments discarded. + (Contributed by Serhiy Storchaka in :gh:`95781`.) + Deprecated ---------- diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 9765ed97a60a44..63bccb72e04646 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2641,8 +2641,6 @@ def check_format(expected, format, *args): b'%c%c', c_int(0x10000), c_int(0x100000)) # test "%" - check_format('%', - b'%') check_format('%', b'%%') check_format('%s', @@ -2819,23 +2817,22 @@ def check_format(expected, format, *args): check_format('repr=abc\ufffd', b'repr=%V', None, b'abc\xff') - # not supported: copy the raw format string. these tests are just here - # to check for crashes and should not be considered as specifications - check_format('%s', - b'%1%s', b'abc') - check_format('%1abc', - b'%1abc') - check_format('%+i', - b'%+i', c_int(10)) - check_format('%.%s', - b'%.%s', b'abc') - # Issue #33817: empty strings check_format('', b'') check_format('', b'%s', b'') + # check for crashes + for fmt in (b'%', b'%0', b'%01', b'%.', b'%.1', + b'%0%s', b'%1%s', b'%.%s', b'%.1%s', b'%1abc', + b'%l', b'%ll', b'%z', b'%ls', b'%lls', b'%zs'): + with self.subTest(fmt=fmt): + self.assertRaisesRegex(SystemError, 'invalid format string', + PyUnicode_FromFormat, fmt, b'abc') + self.assertRaisesRegex(SystemError, 'invalid format string', + PyUnicode_FromFormat, b'%+i', c_int(10)) + # Test PyUnicode_AsWideChar() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') diff --git a/Misc/NEWS.d/next/C API/2022-08-08-14-36-31.gh-issue-95781.W_G8YW.rst b/Misc/NEWS.d/next/C API/2022-08-08-14-36-31.gh-issue-95781.W_G8YW.rst new file mode 100644 index 00000000000000..eb2fd7e9da3d81 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2022-08-08-14-36-31.gh-issue-95781.W_G8YW.rst @@ -0,0 +1,4 @@ +An unrecognized format character in :c:func:`PyUnicode_FromFormat` and +:c:func:`PyUnicode_FromFormatV` now sets a :exc:`SystemError`. +In previous versions it caused all the rest of the format string to be +copied as-is to the result string, and any extra arguments discarded. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 7ff79953257ee6..184a2bfd5dd869 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2355,6 +2355,13 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, p = f; f++; + if (*f == '%') { + if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0) + return NULL; + f++; + return f; + } + zeropad = 0; if (*f == '0') { zeropad = 1; @@ -2392,14 +2399,6 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, f++; } } - if (*f == '%') { - /* "%.3%s" => f points to "3" */ - f--; - } - } - if (*f == '\0') { - /* bogus format "%.123" => go backward, f points to "3" */ - f--; } /* Handle %ld, %lu, %lld and %llu. */ @@ -2423,7 +2422,7 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, ++f; } - if (f[1] == '\0') + if (f[0] != '\0' && f[1] == '\0') writer->overallocate = 0; switch (*f) { @@ -2616,21 +2615,9 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, break; } - case '%': - if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0) - return NULL; - break; - default: - /* if we stumble upon an unknown formatting code, copy the rest - of the format string to the output string. (we cannot just - skip the code, since there's no way to know what's in the - argument list) */ - len = strlen(p); - if (_PyUnicodeWriter_WriteLatin1String(writer, p, len) == -1) - return NULL; - f = p+len; - return f; + PyErr_Format(PyExc_SystemError, "invalid format string: %s", p); + return NULL; } f++;
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: