diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 28398896467898..2ee4e64d635303 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -720,6 +720,13 @@ def test_isidentifier(self): self.assertFalse("©".isidentifier()) self.assertFalse("0".isidentifier()) + @support.cpython_only + def test_isidentifier_legacy(self): + import _testcapi + u = '𝖀𝖓𝖎𝖈𝖔𝖉𝖊' + self.assertTrue(u.isidentifier()) + self.assertTrue(_testcapi.unicode_legacy_string(u).isidentifier()) + def test_isprintable(self): self.assertTrue("".isprintable()) self.assertTrue(" ".isprintable()) diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-05-11-20-53-52.bpo-40596.dwOH_X.rst b/Misc/NEWS.d/next/Core and Builtins/2020-05-11-20-53-52.bpo-40596.dwOH_X.rst new file mode 100644 index 00000000000000..1252db4dc9848d --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-05-11-20-53-52.bpo-40596.dwOH_X.rst @@ -0,0 +1,2 @@ +Fixed :meth:`str.isidentifier` for non-canonicalized strings containing +non-BMP characters on Windows. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 276547ca48a5b2..826298c23a924c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12356,20 +12356,38 @@ PyUnicode_IsIdentifier(PyObject *self) return len && i == len; } else { - Py_ssize_t i, len = PyUnicode_GET_SIZE(self); + Py_ssize_t i = 0, len = PyUnicode_GET_SIZE(self); if (len == 0) { /* an empty string is not a valid identifier */ return 0; } const wchar_t *wstr = _PyUnicode_WSTR(self); - Py_UCS4 ch = wstr[0]; + Py_UCS4 ch = wstr[i++]; +#if SIZEOF_WCHAR_T == 2 + if (Py_UNICODE_IS_HIGH_SURROGATE(ch) + && i < len + && Py_UNICODE_IS_LOW_SURROGATE(wstr[i])) + { + ch = Py_UNICODE_JOIN_SURROGATES(ch, wstr[i]); + i++; + } +#endif if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) { return 0; } - for (i = 1; i < len; i++) { - ch = wstr[i]; + while (i < len) { + ch = wstr[i++]; +#if SIZEOF_WCHAR_T == 2 + if (Py_UNICODE_IS_HIGH_SURROGATE(ch) + && i < len + && Py_UNICODE_IS_LOW_SURROGATE(wstr[i])) + { + ch = Py_UNICODE_JOIN_SURROGATES(ch, wstr[i]); + i++; + } +#endif if (!_PyUnicode_IsXidContinue(ch)) { return 0; }
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: