From b239ae5762604f77fcaac7cb81263a31dac560d0 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Fri, 7 Oct 2022 13:10:24 +0300 Subject: [PATCH 1/9] gh-97982: Reuse `PyUnicode_Count` in `unicode_count` --- ...2-10-07-13-09-57.gh-issue-97982.6qR7Qm.rst | 2 + Objects/unicodeobject.c | 51 +------------------ 2 files changed, 3 insertions(+), 50 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-10-07-13-09-57.gh-issue-97982.6qR7Qm.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-10-07-13-09-57.gh-issue-97982.6qR7Qm.rst b/Misc/NEWS.d/next/Core and Builtins/2022-10-07-13-09-57.gh-issue-97982.6qR7Qm.rst new file mode 100644 index 00000000000000..0084d23af6fc39 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-10-07-13-09-57.gh-issue-97982.6qR7Qm.rst @@ -0,0 +1,2 @@ +Reuse ``PyUnicode_Count`` function in ``unicode_count`` in +``unicodeobject.c``. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index bd169ed714212d..cb465a0e22a8da 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10858,60 +10858,11 @@ unicode_count(PyObject *self, PyObject *args) Py_ssize_t start = 0; Py_ssize_t end = PY_SSIZE_T_MAX; PyObject *result; - int kind1, kind2; - const void *buf1, *buf2; - Py_ssize_t len1, len2, iresult; if (!parse_args_finds_unicode("count", args, &substring, &start, &end)) return NULL; - kind1 = PyUnicode_KIND(self); - kind2 = PyUnicode_KIND(substring); - if (kind1 < kind2) - return PyLong_FromLong(0); - - len1 = PyUnicode_GET_LENGTH(self); - len2 = PyUnicode_GET_LENGTH(substring); - ADJUST_INDICES(start, end, len1); - if (end - start < len2) - return PyLong_FromLong(0); - - buf1 = PyUnicode_DATA(self); - buf2 = PyUnicode_DATA(substring); - if (kind2 != kind1) { - buf2 = unicode_askind(kind2, buf2, len2, kind1); - if (!buf2) - return NULL; - } - switch (kind1) { - case PyUnicode_1BYTE_KIND: - iresult = ucs1lib_count( - ((const Py_UCS1*)buf1) + start, end - start, - buf2, len2, PY_SSIZE_T_MAX - ); - break; - case PyUnicode_2BYTE_KIND: - iresult = ucs2lib_count( - ((const Py_UCS2*)buf1) + start, end - start, - buf2, len2, PY_SSIZE_T_MAX - ); - break; - case PyUnicode_4BYTE_KIND: - iresult = ucs4lib_count( - ((const Py_UCS4*)buf1) + start, end - start, - buf2, len2, PY_SSIZE_T_MAX - ); - break; - default: - Py_UNREACHABLE(); - } - - result = PyLong_FromSsize_t(iresult); - - assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(substring))); - if (kind2 != kind1) - PyMem_Free((void *)buf2); - + result = PyLong_FromSsize_t(PyUnicode_Count(self, substring, start, end)); return result; } From e9d63589165daee34efaf76b33d31b297fdaa484 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Fri, 7 Oct 2022 13:29:38 +0300 Subject: [PATCH 2/9] Address review --- Objects/unicodeobject.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index cb465a0e22a8da..7bf77525112deb 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8964,21 +8964,19 @@ _PyUnicode_InsertThousandsGrouping( return count; } - -Py_ssize_t -PyUnicode_Count(PyObject *str, - PyObject *substr, - Py_ssize_t start, - Py_ssize_t end) +static Py_ssize_t +any_unicode_count(PyObject *str, + PyObject *substr, + Py_ssize_t start, + Py_ssize_t end) { + // You must ensure that `str` and `substr` are both unicode objects + // before calling this function. Py_ssize_t result; int kind1, kind2; const void *buf1 = NULL, *buf2 = NULL; Py_ssize_t len1, len2; - if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0) - return -1; - kind1 = PyUnicode_KIND(str); kind2 = PyUnicode_KIND(substr); if (kind1 < kind2) @@ -9039,6 +9037,18 @@ PyUnicode_Count(PyObject *str, return -1; } +Py_ssize_t +PyUnicode_Count(PyObject *str, + PyObject *substr, + Py_ssize_t start, + Py_ssize_t end) +{ + if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0) + return -1; + + return any_unicode_count(str, substr, start, end); +} + Py_ssize_t PyUnicode_Find(PyObject *str, PyObject *substr, @@ -10858,11 +10868,16 @@ unicode_count(PyObject *self, PyObject *args) Py_ssize_t start = 0; Py_ssize_t end = PY_SSIZE_T_MAX; PyObject *result; + Py_ssize_t iresult; if (!parse_args_finds_unicode("count", args, &substring, &start, &end)) return NULL; - result = PyLong_FromSsize_t(PyUnicode_Count(self, substring, start, end)); + iresult = any_unicode_count(self, substring, start, end); + if (iresult == -1) + return NULL; + + result = PyLong_FromSsize_t(iresult); return result; } From 886d6e388127c7d33b7b4e2027571c573ed35e28 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Fri, 7 Oct 2022 13:44:56 +0300 Subject: [PATCH 3/9] Update 2022-10-07-13-09-57.gh-issue-97982.6qR7Qm.rst --- .../2022-10-07-13-09-57.gh-issue-97982.6qR7Qm.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-10-07-13-09-57.gh-issue-97982.6qR7Qm.rst b/Misc/NEWS.d/next/Core and Builtins/2022-10-07-13-09-57.gh-issue-97982.6qR7Qm.rst index 0084d23af6fc39..69654a8b1431c0 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2022-10-07-13-09-57.gh-issue-97982.6qR7Qm.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2022-10-07-13-09-57.gh-issue-97982.6qR7Qm.rst @@ -1,2 +1,3 @@ -Reuse ``PyUnicode_Count`` function in ``unicode_count`` in +Create ``any_unicode_count`` private helper for +both ``PyUnicode_Count`` and ``unicode_count`` in ``unicodeobject.c``. From 482010ac48f42a89619f585fb69c589be47a005a Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Fri, 7 Oct 2022 14:07:23 +0300 Subject: [PATCH 4/9] Update 2022-10-07-13-09-57.gh-issue-97982.6qR7Qm.rst --- .../2022-10-07-13-09-57.gh-issue-97982.6qR7Qm.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-10-07-13-09-57.gh-issue-97982.6qR7Qm.rst b/Misc/NEWS.d/next/Core and Builtins/2022-10-07-13-09-57.gh-issue-97982.6qR7Qm.rst index 69654a8b1431c0..983ada814b1b20 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2022-10-07-13-09-57.gh-issue-97982.6qR7Qm.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2022-10-07-13-09-57.gh-issue-97982.6qR7Qm.rst @@ -1,3 +1,3 @@ -Create ``any_unicode_count`` private helper for +Create ``any_unicode_count`` private helper for both ``PyUnicode_Count`` and ``unicode_count`` in ``unicodeobject.c``. From 4b2447e7233d714b0462cf7944aee349a27b6059 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Fri, 7 Oct 2022 14:41:14 +0300 Subject: [PATCH 5/9] Use `assert()` for unicode objects --- Lib/test/test_unicode.py | 10 ++++++++++ Objects/unicodeobject.c | 5 +++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 30faaaf83bec96..edb92f106cf696 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -241,6 +241,10 @@ def test_count(self): self.checkequal(0, 'a' * 10, 'count', 'a\u0102') self.checkequal(0, 'a' * 10, 'count', 'a\U00100304') self.checkequal(0, '\u0102' * 10, 'count', '\u0102\U00100304') + # test subclass + class MyStr(str): + pass + self.checkequal(3, MyStr('aaa'), 'count', 'a') def test_find(self): string_tests.CommonTest.test_find(self) @@ -2983,6 +2987,12 @@ def test_count(self): self.assertEqual(unicode_count(uni, ch, 0, len(uni)), 1) self.assertEqual(unicode_count(st, ch, 0, len(st)), 0) + # subclasses should still work + class MyStr(str): + pass + + self.assertEqual(unicode_count(MyStr('aab'), 'a', 0, 3), 2) + # Test PyUnicode_FindChar() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 7bf77525112deb..875de97a51c5a6 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8970,8 +8970,9 @@ any_unicode_count(PyObject *str, Py_ssize_t start, Py_ssize_t end) { - // You must ensure that `str` and `substr` are both unicode objects - // before calling this function. + assert(PyUnicode_Check(str)); + assert(PyUnicode_Check(substr)); + Py_ssize_t result; int kind1, kind2; const void *buf1 = NULL, *buf2 = NULL; From 35ecc77deb4c38f082842ff777f1af67730236a7 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Sat, 8 Oct 2022 12:03:50 +0300 Subject: [PATCH 6/9] Address review --- ...2-10-07-13-09-57.gh-issue-97982.6qR7Qm.rst | 3 --- Objects/unicodeobject.c | 20 +++++++++---------- 2 files changed, 9 insertions(+), 14 deletions(-) delete mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-10-07-13-09-57.gh-issue-97982.6qR7Qm.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-10-07-13-09-57.gh-issue-97982.6qR7Qm.rst b/Misc/NEWS.d/next/Core and Builtins/2022-10-07-13-09-57.gh-issue-97982.6qR7Qm.rst deleted file mode 100644 index 983ada814b1b20..00000000000000 --- a/Misc/NEWS.d/next/Core and Builtins/2022-10-07-13-09-57.gh-issue-97982.6qR7Qm.rst +++ /dev/null @@ -1,3 +0,0 @@ -Create ``any_unicode_count`` private helper for -both ``PyUnicode_Count`` and ``unicode_count`` in -``unicodeobject.c``. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 875de97a51c5a6..469c348eea68e5 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8965,10 +8965,10 @@ _PyUnicode_InsertThousandsGrouping( } static Py_ssize_t -any_unicode_count(PyObject *str, - PyObject *substr, - Py_ssize_t start, - Py_ssize_t end) +unicode_count_impl(PyObject *str, + PyObject *substr, + Py_ssize_t start, + Py_ssize_t end) { assert(PyUnicode_Check(str)); assert(PyUnicode_Check(substr)); @@ -9047,7 +9047,7 @@ PyUnicode_Count(PyObject *str, if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0) return -1; - return any_unicode_count(str, substr, start, end); + return unicode_count_impl(str, substr, start, end); } Py_ssize_t @@ -10868,18 +10868,16 @@ unicode_count(PyObject *self, PyObject *args) PyObject *substring = NULL; /* initialize to fix a compiler warning */ Py_ssize_t start = 0; Py_ssize_t end = PY_SSIZE_T_MAX; - PyObject *result; - Py_ssize_t iresult; + Py_ssize_t result; if (!parse_args_finds_unicode("count", args, &substring, &start, &end)) return NULL; - iresult = any_unicode_count(self, substring, start, end); - if (iresult == -1) + result = unicode_count_impl(self, substring, start, end); + if (result == -1) return NULL; - result = PyLong_FromSsize_t(iresult); - return result; + return PyLong_FromSsize_t(result); } /*[clinic input] From 2dabc73cc3df589be13c827269f9ef66924ce569 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Wed, 12 Oct 2022 18:19:20 +0300 Subject: [PATCH 7/9] Use `anylib_count` in `unicode_count_impl` --- Objects/unicodeobject.c | 56 ++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 37 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ee7f87367897f5..93ac46985d3f2f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8964,6 +8964,21 @@ _PyUnicode_InsertThousandsGrouping( return count; } +static Py_ssize_t +anylib_count(int kind, PyObject *sstr, const void* sbuf, Py_ssize_t slen, + PyObject *str1, const void *buf1, Py_ssize_t len1, Py_ssize_t maxcount) +{ + switch (kind) { + case PyUnicode_1BYTE_KIND: + return ucs1lib_count(sbuf, slen, buf1, len1, maxcount); + case PyUnicode_2BYTE_KIND: + return ucs2lib_count(sbuf, slen, buf1, len1, maxcount); + case PyUnicode_4BYTE_KIND: + return ucs4lib_count(sbuf, slen, buf1, len1, maxcount); + } + Py_UNREACHABLE(); +} + static Py_ssize_t unicode_count_impl(PyObject *str, PyObject *substr, @@ -8997,28 +9012,10 @@ unicode_count_impl(PyObject *str, goto onError; } - switch (kind1) { - case PyUnicode_1BYTE_KIND: - result = ucs1lib_count( - ((const Py_UCS1*)buf1) + start, end - start, - buf2, len2, PY_SSIZE_T_MAX - ); - break; - case PyUnicode_2BYTE_KIND: - result = ucs2lib_count( - ((const Py_UCS2*)buf1) + start, end - start, - buf2, len2, PY_SSIZE_T_MAX - ); - break; - case PyUnicode_4BYTE_KIND: - result = ucs4lib_count( - ((const Py_UCS4*)buf1) + start, end - start, - buf2, len2, PY_SSIZE_T_MAX - ); - break; - default: - Py_UNREACHABLE(); - } + result = anylib_count(kind1, + str, buf1 + start, end - start, + substr, buf2, len2, + PY_SSIZE_T_MAX); assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr))); if (kind2 != kind1) @@ -9903,21 +9900,6 @@ anylib_find(int kind, PyObject *str1, const void *buf1, Py_ssize_t len1, Py_UNREACHABLE(); } -static Py_ssize_t -anylib_count(int kind, PyObject *sstr, const void* sbuf, Py_ssize_t slen, - PyObject *str1, const void *buf1, Py_ssize_t len1, Py_ssize_t maxcount) -{ - switch (kind) { - case PyUnicode_1BYTE_KIND: - return ucs1lib_count(sbuf, slen, buf1, len1, maxcount); - case PyUnicode_2BYTE_KIND: - return ucs2lib_count(sbuf, slen, buf1, len1, maxcount); - case PyUnicode_4BYTE_KIND: - return ucs4lib_count(sbuf, slen, buf1, len1, maxcount); - } - Py_UNREACHABLE(); -} - static void replace_1char_inplace(PyObject *u, Py_ssize_t pos, Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount) From 74f8c1c564bd3d3ac28dd05faa6053980b8e4f95 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Wed, 12 Oct 2022 19:00:55 +0300 Subject: [PATCH 8/9] Revert "Use `anylib_count` in `unicode_count_impl`" This reverts commit 2dabc73cc3df589be13c827269f9ef66924ce569. --- Objects/unicodeobject.c | 56 +++++++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 19 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 93ac46985d3f2f..ee7f87367897f5 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8964,21 +8964,6 @@ _PyUnicode_InsertThousandsGrouping( return count; } -static Py_ssize_t -anylib_count(int kind, PyObject *sstr, const void* sbuf, Py_ssize_t slen, - PyObject *str1, const void *buf1, Py_ssize_t len1, Py_ssize_t maxcount) -{ - switch (kind) { - case PyUnicode_1BYTE_KIND: - return ucs1lib_count(sbuf, slen, buf1, len1, maxcount); - case PyUnicode_2BYTE_KIND: - return ucs2lib_count(sbuf, slen, buf1, len1, maxcount); - case PyUnicode_4BYTE_KIND: - return ucs4lib_count(sbuf, slen, buf1, len1, maxcount); - } - Py_UNREACHABLE(); -} - static Py_ssize_t unicode_count_impl(PyObject *str, PyObject *substr, @@ -9012,10 +8997,28 @@ unicode_count_impl(PyObject *str, goto onError; } - result = anylib_count(kind1, - str, buf1 + start, end - start, - substr, buf2, len2, - PY_SSIZE_T_MAX); + switch (kind1) { + case PyUnicode_1BYTE_KIND: + result = ucs1lib_count( + ((const Py_UCS1*)buf1) + start, end - start, + buf2, len2, PY_SSIZE_T_MAX + ); + break; + case PyUnicode_2BYTE_KIND: + result = ucs2lib_count( + ((const Py_UCS2*)buf1) + start, end - start, + buf2, len2, PY_SSIZE_T_MAX + ); + break; + case PyUnicode_4BYTE_KIND: + result = ucs4lib_count( + ((const Py_UCS4*)buf1) + start, end - start, + buf2, len2, PY_SSIZE_T_MAX + ); + break; + default: + Py_UNREACHABLE(); + } assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr))); if (kind2 != kind1) @@ -9900,6 +9903,21 @@ anylib_find(int kind, PyObject *str1, const void *buf1, Py_ssize_t len1, Py_UNREACHABLE(); } +static Py_ssize_t +anylib_count(int kind, PyObject *sstr, const void* sbuf, Py_ssize_t slen, + PyObject *str1, const void *buf1, Py_ssize_t len1, Py_ssize_t maxcount) +{ + switch (kind) { + case PyUnicode_1BYTE_KIND: + return ucs1lib_count(sbuf, slen, buf1, len1, maxcount); + case PyUnicode_2BYTE_KIND: + return ucs2lib_count(sbuf, slen, buf1, len1, maxcount); + case PyUnicode_4BYTE_KIND: + return ucs4lib_count(sbuf, slen, buf1, len1, maxcount); + } + Py_UNREACHABLE(); +} + static void replace_1char_inplace(PyObject *u, Py_ssize_t pos, Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount) From 69a94e2cc0d0a13de01e9acd2de69d49c7222d58 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Wed, 12 Oct 2022 19:01:39 +0300 Subject: [PATCH 9/9] Add a comment about `anylib_count` --- Objects/unicodeobject.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ee7f87367897f5..5b737f1b596e3e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8997,6 +8997,7 @@ unicode_count_impl(PyObject *str, goto onError; } + // We don't reuse `anylib_count` here because of the explicit casts. switch (kind1) { case PyUnicode_1BYTE_KIND: result = ucs1lib_count( pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy