From 7f5362fa458d87f7d1a80e6ae5d63b017533a17f Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 13 Aug 2018 12:51:09 +0300 Subject: [PATCH 1/2] gh-99593: Add tests for Unicode C API (part 1) Add tests for functions corresponding to the str class methods. --- Lib/test/test_capi/test_unicode.py | 483 ++++++++++++++++-- ...2-11-19-13-34-28.gh-issue-99593.8ZfCkj.rst | 1 + Modules/_testcapi/unicode.c | 338 +++++++++++- 3 files changed, 772 insertions(+), 50 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2022-11-19-13-34-28.gh-issue-99593.8ZfCkj.rst diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index 07b77d3e04bbe0..f09a8fc22e3d5c 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -9,10 +9,26 @@ _testcapi = None +NULL = None + class CAPITest(unittest.TestCase): - # Test PyUnicode_FromFormat() + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_fromobject(self): + """Test PyUnicode_FromObject()""" + from _testcapi import unicode_fromobject as fromobject + + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600', + 'a\ud800b\udfffc', '\ud834\udd1e']: + self.assertEqual(fromobject(s), s) + + self.assertRaises(TypeError, fromobject, b'abc') + self.assertRaises(TypeError, fromobject, []) + # CRASHES fromobject(NULL) + def test_from_format(self): + """Test PyUnicode_FromFormat()""" import_helper.import_module('ctypes') from ctypes import ( c_char_p, @@ -268,10 +284,10 @@ def check_format(expected, format, *args): self.assertRaisesRegex(SystemError, 'invalid format string', PyUnicode_FromFormat, b'%+i', c_int(10)) - # Test PyUnicode_AsWideChar() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_aswidechar(self): + """Test PyUnicode_AsWideChar()""" from _testcapi import unicode_aswidechar import_helper.import_module('ctypes') from ctypes import c_wchar, sizeof @@ -307,10 +323,10 @@ def test_aswidechar(self): self.assertEqual(size, nchar) self.assertEqual(wchar, nonbmp + '\0') - # Test PyUnicode_AsWideCharString() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_aswidecharstring(self): + """Test PyUnicode_AsWideCharString()""" from _testcapi import unicode_aswidecharstring import_helper.import_module('ctypes') from ctypes import c_wchar, sizeof @@ -332,10 +348,10 @@ def test_aswidecharstring(self): self.assertEqual(size, nchar) self.assertEqual(wchar, nonbmp + '\0') - # Test PyUnicode_AsUCS4() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_asucs4(self): + """Test PyUnicode_AsUCS4()""" from _testcapi import unicode_asucs4 for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600', 'a\ud800b\udfffc', '\ud834\udd1e']: @@ -350,10 +366,10 @@ def test_asucs4(self): self.assertEqual(unicode_asucs4(s, len(s), True), s+'\0') self.assertEqual(unicode_asucs4(s, len(s), False), s+'\uffff') - # Test PyUnicode_AsUTF8() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_asutf8(self): + """Test PyUnicode_AsUTF8()""" from _testcapi import unicode_asutf8 bmp = '\u0100' @@ -365,10 +381,10 @@ def test_asutf8(self): self.assertEqual(unicode_asutf8(nonbmp), b'\xf4\x8f\xbf\xbf') self.assertRaises(UnicodeEncodeError, unicode_asutf8, 'a\ud800b\udfffc') - # Test PyUnicode_AsUTF8AndSize() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_asutf8andsize(self): + """Test PyUnicode_AsUTF8AndSize()""" from _testcapi import unicode_asutf8andsize bmp = '\u0100' @@ -380,54 +396,277 @@ def test_asutf8andsize(self): self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4)) self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc') - # Test PyUnicode_Count() + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_concat(self): + """Test PyUnicode_Concat()""" + from _testcapi import unicode_concat as concat + + self.assertEqual(concat('abc', 'def'), 'abcdef') + self.assertEqual(concat('abc', 'где'), 'abcгде') + self.assertEqual(concat('абв', 'def'), 'абвdef') + self.assertEqual(concat('абв', 'где'), 'абвгде') + self.assertEqual(concat('a\0b', 'c\0d'), 'a\0bc\0d') + + self.assertRaises(TypeError, concat, b'abc', 'def') + self.assertRaises(TypeError, concat, 'abc', b'def') + self.assertRaises(TypeError, concat, b'abc', b'def') + self.assertRaises(TypeError, concat, [], 'def') + self.assertRaises(TypeError, concat, 'abc', []) + self.assertRaises(TypeError, concat, [], []) + # CRASHES concat(NULL, 'def') + # CRASHES concat('abc', NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_split(self): + """Test PyUnicode_Split()""" + from _testcapi import unicode_split as split + + self.assertEqual(split('a|b|c|d', '|'), ['a', 'b', 'c', 'd']) + self.assertEqual(split('a|b|c|d', '|', 2), ['a', 'b', 'c|d']) + self.assertEqual(split('a|b|c|d', '\u20ac'), ['a|b|c|d']) + self.assertEqual(split('a||b|c||d', '||'), ['a', 'b|c', 'd']) + self.assertEqual(split('а|б|в|г', '|'), ['а', 'б', 'в', 'г']) + self.assertEqual(split('абабагаламага', 'а'), + ['', 'б', 'б', 'г', 'л', 'м', 'г', '']) + self.assertEqual(split(' a\tb\nc\rd\ve\f', NULL), + ['a', 'b', 'c', 'd', 'e']) + self.assertEqual(split('a\x85b\xa0c\u1680d\u2000e', NULL), + ['a', 'b', 'c', 'd', 'e']) + + self.assertRaises(ValueError, split, 'a|b|c|d', '') + self.assertRaises(TypeError, split, 'a|b|c|d', ord('|')) + self.assertRaises(TypeError, split, [], '|') + # split(NULL, '|') + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_rsplit(self): + """Test PyUnicode_RSplit()""" + from _testcapi import unicode_rsplit as rsplit + + self.assertEqual(rsplit('a|b|c|d', '|'), ['a', 'b', 'c', 'd']) + self.assertEqual(rsplit('a|b|c|d', '|', 2), ['a|b', 'c', 'd']) + self.assertEqual(rsplit('a|b|c|d', '\u20ac'), ['a|b|c|d']) + self.assertEqual(rsplit('a||b|c||d', '||'), ['a', 'b|c', 'd']) + self.assertEqual(rsplit('а|б|в|г', '|'), ['а', 'б', 'в', 'г']) + self.assertEqual(rsplit('абабагаламага', 'а'), + ['', 'б', 'б', 'г', 'л', 'м', 'г', '']) + self.assertEqual(rsplit('aжbжcжd', 'ж'), ['a', 'b', 'c', 'd']) + self.assertEqual(rsplit(' a\tb\nc\rd\ve\f', NULL), + ['a', 'b', 'c', 'd', 'e']) + self.assertEqual(rsplit('a\x85b\xa0c\u1680d\u2000e', NULL), + ['a', 'b', 'c', 'd', 'e']) + + self.assertRaises(ValueError, rsplit, 'a|b|c|d', '') + self.assertRaises(TypeError, rsplit, 'a|b|c|d', ord('|')) + self.assertRaises(TypeError, rsplit, [], '|') + # rsplit(NULL, '|') + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_partition(self): + """Test PyUnicode_Partition()""" + from _testcapi import unicode_partition as partition + + self.assertEqual(partition('a|b|c', '|'), ('a', '|', 'b|c')) + self.assertEqual(partition('a||b||c', '||'), ('a', '||', 'b||c')) + self.assertEqual(partition('а|б|в', '|'), ('а', '|', 'б|в')) + self.assertEqual(partition('кабан', 'а'), ('к', 'а', 'бан')) + self.assertEqual(partition('aжbжc', 'ж'), ('a', 'ж', 'bжc')) + + self.assertRaises(ValueError, partition, 'a|b|c', '') + self.assertRaises(TypeError, partition, b'a|b|c', '|') + self.assertRaises(TypeError, partition, 'a|b|c', b'|') + self.assertRaises(TypeError, partition, 'a|b|c', ord('|')) + self.assertRaises(TypeError, partition, [], '|') + # CRASHES partition(NULL, '|') + # CRASHES partition('a|b|c', NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_rpartition(self): + """Test PyUnicode_RPartition()""" + from _testcapi import unicode_rpartition as rpartition + + self.assertEqual(rpartition('a|b|c', '|'), ('a|b', '|', 'c')) + self.assertEqual(rpartition('a||b||c', '||'), ('a||b', '||', 'c')) + self.assertEqual(rpartition('а|б|в', '|'), ('а|б', '|', 'в')) + self.assertEqual(rpartition('кабан', 'а'), ('каб', 'а', 'н')) + self.assertEqual(rpartition('aжbжc', 'ж'), ('aжb', 'ж', 'c')) + + self.assertRaises(ValueError, rpartition, 'a|b|c', '') + self.assertRaises(TypeError, rpartition, b'a|b|c', '|') + self.assertRaises(TypeError, rpartition, 'a|b|c', b'|') + self.assertRaises(TypeError, rpartition, 'a|b|c', ord('|')) + self.assertRaises(TypeError, rpartition, [], '|') + # CRASHES rpartition(NULL, '|') + # CRASHES rpartition('a|b|c', NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_splitlines(self): + """Test PyUnicode_SplitLines()""" + from _testcapi import unicode_splitlines as splitlines + + self.assertEqual(splitlines('a\nb\rc\r\nd'), ['a', 'b', 'c', 'd']) + self.assertEqual(splitlines('a\nb\rc\r\nd', True), + ['a\n', 'b\r', 'c\r\n', 'd']) + self.assertEqual(splitlines('a\x85b\u2028c\u2029d'), + ['a', 'b', 'c', 'd']) + self.assertEqual(splitlines('a\x85b\u2028c\u2029d', True), + ['a\x85', 'b\u2028', 'c\u2029', 'd']) + self.assertEqual(splitlines('а\nб\rв\r\nг'), ['а', 'б', 'в', 'г']) + + self.assertRaises(TypeError, splitlines, b'a\nb\rc\r\nd') + # CRASHES splitlines(NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_translate(self): + """Test PyUnicode_Translate()""" + from _testcapi import unicode_translate as translate + + self.assertEqual(translate('abcd', {ord('a'): 'A', ord('b'): ord('B'), ord('c'): '<>'}), 'AB<>d') + self.assertEqual(translate('абвг', {ord('а'): 'А', ord('б'): ord('Б'), ord('в'): '<>'}), 'АБ<>г') + self.assertEqual(translate('abc', []), 'abc') + self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None}) + self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None}, 'strict') + self.assertRaises(LookupError, translate, 'abc', {ord('b'): None}, 'foo') + self.assertEqual(translate('abc', {ord('b'): None}, 'ignore'), 'ac') + self.assertEqual(translate('abc', {ord('b'): None}, 'replace'), 'a\ufffdc') + self.assertEqual(translate('abc', {ord('b'): None}, 'backslashreplace'), r'a\x62c') + # XXX Other error handlers do not support UnicodeTranslateError + self.assertRaises(TypeError, translate, b'abc', []) + self.assertRaises(TypeError, translate, 123, []) + self.assertRaises(TypeError, translate, 'abc', {ord('a'): b'A'}) + self.assertRaises(TypeError, translate, 'abc', 123) + self.assertRaises(TypeError, translate, 'abc', NULL) + # CRASHES translate(NULL, []) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_join(self): + """Test PyUnicode_Join()""" + from _testcapi import unicode_join as join + self.assertEqual(join('|', ['a', 'b', 'c']), 'a|b|c') + self.assertEqual(join('', ['a', 'b', 'c']), 'abc') + self.assertEqual(join(NULL, ['a', 'b', 'c']), 'a b c') + self.assertEqual(join('|', ['а', 'б', 'в']), 'а|б|в') + self.assertEqual(join('ж', ['а', 'б', 'в']), 'ажбжв') + self.assertRaises(TypeError, join, b'|', ['a', 'b', 'c']) + self.assertRaises(TypeError, join, '|', [b'a', b'b', b'c']) + self.assertRaises(TypeError, join, NULL, [b'a', b'b', b'c']) + self.assertRaises(TypeError, join, '|', b'123') + self.assertRaises(TypeError, join, '|', 123) + self.assertRaises(SystemError, join, '|', NULL) + @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_count(self): + """Test PyUnicode_Count()""" from _testcapi import unicode_count - st = 'abcabd' - self.assertEqual(unicode_count(st, 'a', 0, len(st)), 2) - self.assertEqual(unicode_count(st, 'ab', 0, len(st)), 2) - self.assertEqual(unicode_count(st, 'abc', 0, len(st)), 1) - self.assertEqual(unicode_count(st, 'а', 0, len(st)), 0) # cyrillic "a" + for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1": + for i, ch in enumerate(str): + self.assertEqual(unicode_count(str, ch, 0, len(str)), 1) + + str = "!>_= end - self.assertEqual(unicode_count(st, 'abc', 0, 0), 0) - self.assertEqual(unicode_count(st, 'a', 3, 2), 0) - self.assertEqual(unicode_count(st, 'a', sys.maxsize, 5), 0) + self.assertEqual(unicode_count(str, '!', 0, 0), 0) + self.assertEqual(unicode_count(str, '!', len(str), 0), 0) # negative - self.assertEqual(unicode_count(st, 'ab', -len(st), -1), 2) - self.assertEqual(unicode_count(st, 'a', -len(st), -3), 1) - # wrong args - self.assertRaises(TypeError, unicode_count, 'a', 'a') - self.assertRaises(TypeError, unicode_count, 'a', 'a', 1) - self.assertRaises(TypeError, unicode_count, 1, 'a', 0, 1) - self.assertRaises(TypeError, unicode_count, 'a', 1, 0, 1) - # empty string - self.assertEqual(unicode_count('abc', '', 0, 3), 4) - self.assertEqual(unicode_count('abc', '', 1, 3), 3) - self.assertEqual(unicode_count('', '', 0, 1), 1) - self.assertEqual(unicode_count('', 'a', 0, 1), 0) - # different unicode kinds - for uni in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1": - for ch in uni: - self.assertEqual(unicode_count(uni, ch, 0, len(uni)), 1) - self.assertEqual(unicode_count(st, ch, 0, len(st)), 0) - - # subclasses should still work - class MyStr(str): - pass - - self.assertEqual(unicode_count(MyStr('aab'), 'a', 0, 3), 2) - - # Test PyUnicode_FindChar() + self.assertEqual(unicode_count(str, '!', -len(str), -1), 1) + # bad arguments + self.assertRaises(TypeError, unicode_count, str, b'!', 0, len(str)) + self.assertRaises(TypeError, unicode_count, b"!>__= end + self.assertEqual(find(str, '!', 0, 0, 1), -1) + self.assertEqual(find(str, '!', len(str), 0, 1), -1) + # negative + self.assertEqual(find(str, '!', -len(str), -1, 1), 0) + self.assertEqual(find(str, '!', -len(str), -1, -1), 0) + # bad arguments + self.assertRaises(TypeError, find, str, b'!', 0, len(str), 1) + self.assertRaises(TypeError, find, b"!>__'), '<>br<>c<>d<>br<>') + self.assertEqual(replace(str, 'abra', '='), '=cad=') + self.assertEqual(replace(str, 'a', '=', 2), '=br=cadabra') + self.assertEqual(replace(str, 'a', '=', 0), str) + self.assertEqual(replace(str, 'a', '=', sys.maxsize), '=br=c=d=br=') + self.assertEqual(replace(str, 'z', '='), str) + self.assertEqual(replace(str, '', '='), '=a=b=r=a=c=a=d=a=b=r=a=') + self.assertEqual(replace(str, 'a', 'ж'), 'жbrжcжdжbrж') + self.assertEqual(replace('абабагаламага', 'а', '='), '=б=б=г=л=м=г=') + self.assertEqual(replace('Баден-Баден', 'Баден', 'Baden'), 'Baden-Baden') + # bad arguments + self.assertRaises(TypeError, replace, 'a', 'a', b'=') + self.assertRaises(TypeError, replace, 'a', b'a', '=') + self.assertRaises(TypeError, replace, b'a', 'a', '=') + self.assertRaises(TypeError, replace, 'a', 'a', ord('=')) + self.assertRaises(TypeError, replace, 'a', ord('a'), '=') + self.assertRaises(TypeError, replace, [], 'a', '=') + # CRASHES replace('a', 'a', NULL) + # CRASHES replace('a', NULL, '=') + # CRASHES replace(NULL, 'a', '=') + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_compare(self): + """Test PyUnicode_Compare()""" + from _testcapi import unicode_compare as compare + + self.assertEqual(compare('abc', 'abc'), 0) + self.assertEqual(compare('abc', 'def'), -1) + self.assertEqual(compare('def', 'abc'), 1) + self.assertEqual(compare('abc', 'abc\0def'), -1) + self.assertEqual(compare('abc\0def', 'abc\0def'), 0) + self.assertEqual(compare('абв', 'abc'), 1) + + self.assertRaises(TypeError, compare, b'abc', 'abc') + self.assertRaises(TypeError, compare, 'abc', b'abc') + self.assertRaises(TypeError, compare, b'abc', b'abc') + self.assertRaises(TypeError, compare, [], 'abc') + self.assertRaises(TypeError, compare, 'abc', []) + self.assertRaises(TypeError, compare, [], []) + # CRASHES compare(NULL, 'abc') + # CRASHES compare('abc', NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_comparewithasciistring(self): + """Test PyUnicode_CompareWithASCIIString()""" + from _testcapi import unicode_comparewithasciistring as comparewithasciistring + + self.assertEqual(comparewithasciistring('abc', b'abc'), 0) + self.assertEqual(comparewithasciistring('abc', b'def'), -1) + self.assertEqual(comparewithasciistring('def', b'abc'), 1) + self.assertEqual(comparewithasciistring('abc', b'abc\0def'), 0) + self.assertEqual(comparewithasciistring('abc\0def', b'abc\0def'), 1) + self.assertEqual(comparewithasciistring('абв', b'abc'), 1) + + # CRASHES comparewithasciistring(b'abc', b'abc') + # CRASHES comparewithasciistring([], b'abc') + # CRASHES comparewithasciistring(NULL, b'abc') + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_richcompare(self): + """Test PyUnicode_RichCompare()""" + from _testcapi import unicode_richcompare as richcompare + + LT, LE, EQ, NE, GT, GE = range(6) + strings = ('abc', 'абв', '\U0001f600', 'abc\0') + for s1 in strings: + for s2 in strings: + self.assertIs(richcompare(s1, s2, LT), s1 < s2) + self.assertIs(richcompare(s1, s2, LE), s1 <= s2) + self.assertIs(richcompare(s1, s2, EQ), s1 == s2) + self.assertIs(richcompare(s1, s2, NE), s1 != s2) + self.assertIs(richcompare(s1, s2, GT), s1 > s2) + self.assertIs(richcompare(s1, s2, GE), s1 >= s2) + + for op in LT, LE, EQ, NE, GT, GE: + self.assertIs(richcompare(b'abc', 'abc', op), NotImplemented) + self.assertIs(richcompare('abc', b'abc', op), NotImplemented) + self.assertIs(richcompare(b'abc', b'abc', op), NotImplemented) + self.assertIs(richcompare([], 'abc', op), NotImplemented) + self.assertIs(richcompare('abc', [], op), NotImplemented) + self.assertIs(richcompare([], [], op), NotImplemented) + + # CRASHES richcompare(NULL, 'abc', op) + # CRASHES richcompare('abc', NULL, op) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_format(self): + """Test PyUnicode_Contains()""" + from _testcapi import unicode_format as format + + self.assertEqual(format('x=%d!', 42), 'x=42!') + self.assertEqual(format('x=%d!', (42,)), 'x=42!') + self.assertEqual(format('x=%d y=%s!', (42, [])), 'x=42 y=[]!') + + self.assertRaises(SystemError, format, 'x=%d!', NULL) + self.assertRaises(SystemError, format, NULL, 42) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_contains(self): + """Test PyUnicode_Contains()""" + from _testcapi import unicode_contains as contains + + self.assertEqual(contains('abcd', ''), 1) + self.assertEqual(contains('abcd', 'b'), 1) + self.assertEqual(contains('abcd', 'x'), 0) + self.assertEqual(contains('abcd', 'ж'), 0) + self.assertEqual(contains('abcd', '\0'), 0) + self.assertEqual(contains('abc\0def', '\0'), 1) + self.assertEqual(contains('abcd', 'bc'), 1) + + self.assertRaises(TypeError, contains, b'abcd', 'b') + self.assertRaises(TypeError, contains, 'abcd', b'b') + self.assertRaises(TypeError, contains, b'abcd', b'b') + self.assertRaises(TypeError, contains, [], 'b') + self.assertRaises(TypeError, contains, 'abcd', ord('b')) + # CRASHES contains(NULL, 'b') + # CRASHES contains('abcd', NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_isidentifier(self): + """Test PyUnicode_IsIdentifier()""" + from _testcapi import unicode_isidentifier as isidentifier + + self.assertEqual(isidentifier("a"), 1) + self.assertEqual(isidentifier("b0"), 1) + self.assertEqual(isidentifier("µ"), 1) + self.assertEqual(isidentifier("𝔘𝔫𝔦𝔠𝔬𝔡𝔢"), 1) + + self.assertEqual(isidentifier(""), 0) + self.assertEqual(isidentifier(" "), 0) + self.assertEqual(isidentifier("["), 0) + self.assertEqual(isidentifier("©"), 0) + self.assertEqual(isidentifier("0"), 0) + + # CRASHES isidentifier(b"a") + # CRASHES isidentifier([]) + # CRASHES isidentifier(NULL) - # Test PyUnicode_CopyCharacters() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_copycharacters(self): + """Test PyUnicode_CopyCharacters()""" from _testcapi import unicode_copycharacters strings = [ diff --git a/Misc/NEWS.d/next/Tests/2022-11-19-13-34-28.gh-issue-99593.8ZfCkj.rst b/Misc/NEWS.d/next/Tests/2022-11-19-13-34-28.gh-issue-99593.8ZfCkj.rst new file mode 100644 index 00000000000000..ec4cda2080323f --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2022-11-19-13-34-28.gh-issue-99593.8ZfCkj.rst @@ -0,0 +1 @@ +Cover the Unicode C API with tests. diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c index d5c4a9e5b95ec6..4c5049dd406a7c 100644 --- a/Modules/_testcapi/unicode.c +++ b/Modules/_testcapi/unicode.c @@ -1,3 +1,4 @@ +#define PY_SSIZE_T_CLEAN #include "parts.h" static struct PyModuleDef *_testcapimodule = NULL; // set at initialization @@ -99,6 +100,17 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored)) Py_RETURN_NONE; } +#define NULLABLE(x) do { if (x == Py_None) x = NULL; } while (0); + +/* Test PyUnicode_FromObject() */ +static PyObject * +unicode_fromobject(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return PyUnicode_FromObject(arg); +} + +/* Test PyUnicode_AsWideChar() */ static PyObject * unicode_aswidechar(PyObject *self, PyObject *args) { @@ -130,6 +142,7 @@ unicode_aswidechar(PyObject *self, PyObject *args) return Py_BuildValue("(Nn)", result, size); } +/* Test PyUnicode_AsWideCharString() */ static PyObject * unicode_aswidecharstring(PyObject *self, PyObject *args) { @@ -151,6 +164,7 @@ unicode_aswidecharstring(PyObject *self, PyObject *args) return Py_BuildValue("(Nn)", result, size); } +/* Test PyUnicode_AsUCS4() */ static PyObject * unicode_asucs4(PyObject *self, PyObject *args) { @@ -181,6 +195,7 @@ unicode_asucs4(PyObject *self, PyObject *args) return result; } +/* Test PyUnicode_AsUTF8() */ static PyObject * unicode_asutf8(PyObject *self, PyObject *args) { @@ -199,6 +214,7 @@ unicode_asutf8(PyObject *self, PyObject *args) return PyBytes_FromString(buffer); } +/* Test PyUnicode_AsUTF8AndSize() */ static PyObject * unicode_asutf8andsize(PyObject *self, PyObject *args) { @@ -223,26 +239,194 @@ unicode_asutf8andsize(PyObject *self, PyObject *args) return Py_BuildValue("(Nn)", result, utf8_len); } +/* Test PyUnicode_Concat() */ +static PyObject * +unicode_concat(PyObject *self, PyObject *args) +{ + PyObject *left; + PyObject *right; + + if (!PyArg_ParseTuple(args, "OO", &left, &right)) + return NULL; + + NULLABLE(left); + NULLABLE(right); + return PyUnicode_Concat(left, right); +} + +/* Test PyUnicode_Split() */ +static PyObject * +unicode_split(PyObject *self, PyObject *args) +{ + PyObject *s; + PyObject *sep; + Py_ssize_t maxsplit = -1; + + if (!PyArg_ParseTuple(args, "OO|n", &s, &sep, &maxsplit)) + return NULL; + + NULLABLE(s); + NULLABLE(sep); + return PyUnicode_Split(s, sep, maxsplit); +} + +/* Test PyUnicode_RSplit() */ +static PyObject * +unicode_rsplit(PyObject *self, PyObject *args) +{ + PyObject *s; + PyObject *sep; + Py_ssize_t maxsplit = -1; + + if (!PyArg_ParseTuple(args, "OO|n", &s, &sep, &maxsplit)) + return NULL; + + NULLABLE(s); + NULLABLE(sep); + return PyUnicode_RSplit(s, sep, maxsplit); +} + +/* Test PyUnicode_Splitlines() */ +static PyObject * +unicode_splitlines(PyObject *self, PyObject *args) +{ + PyObject *s; + int keepends = 0; + + if (!PyArg_ParseTuple(args, "O|i", &s, &keepends)) + return NULL; + + NULLABLE(s); + return PyUnicode_Splitlines(s, keepends); +} + +/* Test PyUnicode_Partition() */ +static PyObject * +unicode_partition(PyObject *self, PyObject *args) +{ + PyObject *s; + PyObject *sep; + + if (!PyArg_ParseTuple(args, "OO", &s, &sep)) + return NULL; + + NULLABLE(s); + NULLABLE(sep); + return PyUnicode_Partition(s, sep); +} + +/* Test PyUnicode_RPartition() */ +static PyObject * +unicode_rpartition(PyObject *self, PyObject *args) +{ + PyObject *s; + PyObject *sep; + + if (!PyArg_ParseTuple(args, "OO", &s, &sep)) + return NULL; + + NULLABLE(s); + NULLABLE(sep); + return PyUnicode_RPartition(s, sep); +} + +/* Test PyUnicode_Translate() */ +static PyObject * +unicode_translate(PyObject *self, PyObject *args) +{ + PyObject *obj; + PyObject *table; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "OO|z", &obj, &table, &errors)) + return NULL; + + NULLABLE(obj); + NULLABLE(table); + return PyUnicode_Translate(obj, table, errors); +} + +/* Test PyUnicode_Join() */ +static PyObject * +unicode_join(PyObject *self, PyObject *args) +{ + PyObject *sep; + PyObject *seq; + + if (!PyArg_ParseTuple(args, "OO", &sep, &seq)) + return NULL; + + NULLABLE(sep); + NULLABLE(seq); + return PyUnicode_Join(sep, seq); +} + +/* Test PyUnicode_Count() */ static PyObject * unicode_count(PyObject *self, PyObject *args) { PyObject *str; PyObject *substr; + Py_ssize_t start; + Py_ssize_t end; Py_ssize_t result; - Py_ssize_t start, end; - if (!PyArg_ParseTuple(args, "UUnn:unicode_count", &str, &substr, - &start, &end)) { + if (!PyArg_ParseTuple(args, "OOnn", &str, &substr, &start, &end)) return NULL; - } + NULLABLE(str); + NULLABLE(substr); result = PyUnicode_Count(str, substr, start, end); if (result == -1) return NULL; - else - return PyLong_FromSsize_t(result); + return PyLong_FromSsize_t(result); +} + +/* Test PyUnicode_Find() */ +static PyObject * +unicode_find(PyObject *self, PyObject *args) +{ + PyObject *str; + PyObject *substr; + Py_ssize_t start; + Py_ssize_t end; + int direction; + Py_ssize_t result; + + if (!PyArg_ParseTuple(args, "OOnni", &str, &substr, &start, &end, &direction)) + return NULL; + + NULLABLE(str); + NULLABLE(substr); + result = PyUnicode_Find(str, substr, start, end, direction); + if (result == -2) + return NULL; + return PyLong_FromSsize_t(result); } +/* Test PyUnicode_Tailmatch() */ +static PyObject * +unicode_tailmatch(PyObject *self, PyObject *args) +{ + PyObject *str; + PyObject *substr; + Py_ssize_t start; + Py_ssize_t end; + int direction; + Py_ssize_t result; + + if (!PyArg_ParseTuple(args, "OOnni", &str, &substr, &start, &end, &direction)) + return NULL; + + NULLABLE(str); + NULLABLE(substr); + result = PyUnicode_Tailmatch(str, substr, start, end, direction); + if (result == -1) + return NULL; + return PyLong_FromSsize_t(result); +} + +/* Test PyUnicode_FindChar() */ static PyObject * unicode_findchar(PyObject *self, PyObject *args) { @@ -264,6 +448,130 @@ unicode_findchar(PyObject *self, PyObject *args) return PyLong_FromSsize_t(result); } +/* Test PyUnicode_Replace() */ +static PyObject * +unicode_replace(PyObject *self, PyObject *args) +{ + PyObject *str; + PyObject *substr; + PyObject *replstr; + Py_ssize_t maxcount = -1; + + if (!PyArg_ParseTuple(args, "OOO|n", &str, &substr, &replstr, &maxcount)) + return NULL; + + NULLABLE(str); + NULLABLE(substr); + NULLABLE(replstr); + return PyUnicode_Replace(str, substr, replstr, maxcount); +} + +/* Test PyUnicode_Compare() */ +static PyObject * +unicode_compare(PyObject *self, PyObject *args) +{ + PyObject *left; + PyObject *right; + int result; + + if (!PyArg_ParseTuple(args, "OO", &left, &right)) + return NULL; + + NULLABLE(left); + NULLABLE(right); + result = PyUnicode_Compare(left, right); + if (result == -1 && PyErr_Occurred()) { + return NULL; + } + return PyLong_FromLong(result); +} + +/* Test PyUnicode_CompareWithASCIIString() */ +static PyObject * +unicode_comparewithasciistring(PyObject *self, PyObject *args) +{ + PyObject *left; + const char *right = NULL; + Py_ssize_t right_len; + int result; + + if (!PyArg_ParseTuple(args, "O|y#", &left, &right, &right_len)) + return NULL; + + NULLABLE(left); + result = PyUnicode_CompareWithASCIIString(left, right); + if (result == -1 && PyErr_Occurred()) { + return NULL; + } + return PyLong_FromLong(result); +} + +/* Test PyUnicode_RichCompare() */ +static PyObject * +unicode_richcompare(PyObject *self, PyObject *args) +{ + PyObject *left; + PyObject *right; + int op; + + if (!PyArg_ParseTuple(args, "OOi", &left, &right, &op)) + return NULL; + + NULLABLE(left); + NULLABLE(right); + return PyUnicode_RichCompare(left, right, op); +} + +/* Test PyUnicode_Format() */ +static PyObject * +unicode_format(PyObject *self, PyObject *args) +{ + PyObject *format; + PyObject *fargs; + + if (!PyArg_ParseTuple(args, "OO", &format, &fargs)) + return NULL; + + NULLABLE(format); + NULLABLE(fargs); + return PyUnicode_Format(format, fargs); +} + +/* Test PyUnicode_Contains() */ +static PyObject * +unicode_contains(PyObject *self, PyObject *args) +{ + PyObject *container; + PyObject *element; + int result; + + if (!PyArg_ParseTuple(args, "OO", &container, &element)) + return NULL; + + NULLABLE(container); + NULLABLE(element); + result = PyUnicode_Contains(container, element); + if (result == -1 && PyErr_Occurred()) { + return NULL; + } + return PyLong_FromLong(result); +} + +/* Test PyUnicode_IsIdentifier() */ +static PyObject * +unicode_isidentifier(PyObject *self, PyObject *arg) +{ + int result; + + NULLABLE(arg); + result = PyUnicode_IsIdentifier(arg); + if (result == -1 && PyErr_Occurred()) { + return NULL; + } + return PyLong_FromLong(result); +} + +/* Test PyUnicode_CopyCharacters() */ static PyObject * unicode_copycharacters(PyObject *self, PyObject *args) { @@ -711,13 +1019,31 @@ static PyMethodDef TestMethods[] = { test_unicode_compare_with_ascii, METH_NOARGS}, {"test_string_from_format", test_string_from_format, METH_NOARGS}, {"test_widechar", test_widechar, METH_NOARGS}, + {"unicode_fromobject", unicode_fromobject, METH_O}, {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS}, {"unicode_aswidecharstring", unicode_aswidecharstring, METH_VARARGS}, {"unicode_asucs4", unicode_asucs4, METH_VARARGS}, {"unicode_asutf8", unicode_asutf8, METH_VARARGS}, {"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS}, + {"unicode_concat", unicode_concat, METH_VARARGS}, + {"unicode_splitlines", unicode_splitlines, METH_VARARGS}, + {"unicode_split", unicode_split, METH_VARARGS}, + {"unicode_rsplit", unicode_rsplit, METH_VARARGS}, + {"unicode_partition", unicode_partition, METH_VARARGS}, + {"unicode_rpartition", unicode_rpartition, METH_VARARGS}, + {"unicode_translate", unicode_translate, METH_VARARGS}, + {"unicode_join", unicode_join, METH_VARARGS}, {"unicode_count", unicode_count, METH_VARARGS}, + {"unicode_tailmatch", unicode_tailmatch, METH_VARARGS}, + {"unicode_find", unicode_find, METH_VARARGS}, {"unicode_findchar", unicode_findchar, METH_VARARGS}, + {"unicode_replace", unicode_replace, METH_VARARGS}, + {"unicode_compare", unicode_compare, METH_VARARGS}, + {"unicode_comparewithasciistring",unicode_comparewithasciistring,METH_VARARGS}, + {"unicode_richcompare", unicode_richcompare, METH_VARARGS}, + {"unicode_format", unicode_format, METH_VARARGS}, + {"unicode_contains", unicode_contains, METH_VARARGS}, + {"unicode_isidentifier", unicode_isidentifier, METH_O}, {"unicode_copycharacters", unicode_copycharacters, METH_VARARGS}, {NULL}, }; From 545400a95a6314192f4f8b9487c2c699d0f18db7 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 27 Nov 2022 09:52:51 +0200 Subject: [PATCH 2/2] Address review comments. --- Lib/test/test_capi/test_unicode.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index f09a8fc22e3d5c..857579f758386f 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -11,6 +11,10 @@ NULL = None +class Str(str): + pass + + class CAPITest(unittest.TestCase): @support.cpython_only @@ -22,6 +26,11 @@ def test_fromobject(self): for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600', 'a\ud800b\udfffc', '\ud834\udd1e']: self.assertEqual(fromobject(s), s) + o = Str(s) + s2 = fromobject(o) + self.assertEqual(s2, s) + self.assertIs(type(s2), str) + self.assertIsNot(s2, s) self.assertRaises(TypeError, fromobject, b'abc') self.assertRaises(TypeError, fromobject, []) @@ -438,7 +447,7 @@ def test_split(self): self.assertRaises(ValueError, split, 'a|b|c|d', '') self.assertRaises(TypeError, split, 'a|b|c|d', ord('|')) self.assertRaises(TypeError, split, [], '|') - # split(NULL, '|') + # CRASHES split(NULL, '|') @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') @@ -462,7 +471,7 @@ def test_rsplit(self): self.assertRaises(ValueError, rsplit, 'a|b|c|d', '') self.assertRaises(TypeError, rsplit, 'a|b|c|d', ord('|')) self.assertRaises(TypeError, rsplit, [], '|') - # rsplit(NULL, '|') + # CRASHES rsplit(NULL, '|') @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') @@ -530,6 +539,7 @@ def test_translate(self): self.assertEqual(translate('abcd', {ord('a'): 'A', ord('b'): ord('B'), ord('c'): '<>'}), 'AB<>d') self.assertEqual(translate('абвг', {ord('а'): 'А', ord('б'): ord('Б'), ord('в'): '<>'}), 'АБ<>г') + self.assertEqual(translate('abc', {}), 'abc') self.assertEqual(translate('abc', []), 'abc') self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None}) self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None}, 'strict') @@ -543,6 +553,7 @@ def test_translate(self): self.assertRaises(TypeError, translate, 'abc', {ord('a'): b'A'}) self.assertRaises(TypeError, translate, 'abc', 123) self.assertRaises(TypeError, translate, 'abc', NULL) + self.assertRaises(LookupError, translate, 'abc', {ord('b'): None}, 'foo') # CRASHES translate(NULL, []) @support.cpython_only @@ -551,6 +562,7 @@ def test_join(self): """Test PyUnicode_Join()""" from _testcapi import unicode_join as join self.assertEqual(join('|', ['a', 'b', 'c']), 'a|b|c') + self.assertEqual(join('|', ['a', '', 'c']), 'a||c') self.assertEqual(join('', ['a', 'b', 'c']), 'abc') self.assertEqual(join(NULL, ['a', 'b', 'c']), 'a b c') self.assertEqual(join('|', ['а', 'б', 'в']), 'а|б|в') @@ -596,11 +608,6 @@ def test_tailmatch(self): """Test PyUnicode_Tailmatch()""" from _testcapi import unicode_tailmatch as tailmatch - #for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1": - #for i, ch in enumerate(str): - #self.assertEqual(tailmatch(str, ch, 0, len(str), 1), i) - #self.assertEqual(tailmatch(str, ch, 0, len(str), -1), i) - str = 'ababahalamaha' self.assertEqual(tailmatch(str, 'aba', 0, len(str), -1), 1) self.assertEqual(tailmatch(str, 'aha', 0, len(str), 1), 1) @@ -790,7 +797,7 @@ def test_richcompare(self): @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_format(self): - """Test PyUnicode_Contains()""" + """Test PyUnicode_Format()""" from _testcapi import unicode_format as format self.assertEqual(format('x=%d!', 42), 'x=42!') @@ -838,6 +845,7 @@ def test_isidentifier(self): self.assertEqual(isidentifier("["), 0) self.assertEqual(isidentifier("©"), 0) self.assertEqual(isidentifier("0"), 0) + self.assertEqual(isidentifier("32M"), 0) # CRASHES isidentifier(b"a") # CRASHES isidentifier([]) pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy