From 7f5362fa458d87f7d1a80e6ae5d63b017533a17f Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Mon, 13 Aug 2018 12:51:09 +0300
Subject: [PATCH 1/2] gh-99593: Add tests for Unicode C API (part 1)

Add tests for functions corresponding to the str class methods.
---
 Lib/test/test_capi/test_unicode.py            | 483 ++++++++++++++++--
 ...2-11-19-13-34-28.gh-issue-99593.8ZfCkj.rst |   1 +
 Modules/_testcapi/unicode.c                   | 338 +++++++++++-
 3 files changed, 772 insertions(+), 50 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Tests/2022-11-19-13-34-28.gh-issue-99593.8ZfCkj.rst

diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index 07b77d3e04bbe0..f09a8fc22e3d5c 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -9,10 +9,26 @@
     _testcapi = None
 
 
+NULL = None
+
 class CAPITest(unittest.TestCase):
 
-    # Test PyUnicode_FromFormat()
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_fromobject(self):
+        """Test PyUnicode_FromObject()"""
+        from _testcapi import unicode_fromobject as fromobject
+
+        for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
+                  'a\ud800b\udfffc', '\ud834\udd1e']:
+            self.assertEqual(fromobject(s), s)
+
+        self.assertRaises(TypeError, fromobject, b'abc')
+        self.assertRaises(TypeError, fromobject, [])
+        # CRASHES fromobject(NULL)
+
     def test_from_format(self):
+        """Test PyUnicode_FromFormat()"""
         import_helper.import_module('ctypes')
         from ctypes import (
             c_char_p,
@@ -268,10 +284,10 @@ def check_format(expected, format, *args):
         self.assertRaisesRegex(SystemError, 'invalid format string',
             PyUnicode_FromFormat, b'%+i', c_int(10))
 
-    # Test PyUnicode_AsWideChar()
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_aswidechar(self):
+        """Test PyUnicode_AsWideChar()"""
         from _testcapi import unicode_aswidechar
         import_helper.import_module('ctypes')
         from ctypes import c_wchar, sizeof
@@ -307,10 +323,10 @@ def test_aswidechar(self):
         self.assertEqual(size, nchar)
         self.assertEqual(wchar, nonbmp + '\0')
 
-    # Test PyUnicode_AsWideCharString()
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_aswidecharstring(self):
+        """Test PyUnicode_AsWideCharString()"""
         from _testcapi import unicode_aswidecharstring
         import_helper.import_module('ctypes')
         from ctypes import c_wchar, sizeof
@@ -332,10 +348,10 @@ def test_aswidecharstring(self):
         self.assertEqual(size, nchar)
         self.assertEqual(wchar, nonbmp + '\0')
 
-    # Test PyUnicode_AsUCS4()
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_asucs4(self):
+        """Test PyUnicode_AsUCS4()"""
         from _testcapi import unicode_asucs4
         for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
                   'a\ud800b\udfffc', '\ud834\udd1e']:
@@ -350,10 +366,10 @@ def test_asucs4(self):
             self.assertEqual(unicode_asucs4(s, len(s), True), s+'\0')
             self.assertEqual(unicode_asucs4(s, len(s), False), s+'\uffff')
 
-    # Test PyUnicode_AsUTF8()
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_asutf8(self):
+        """Test PyUnicode_AsUTF8()"""
         from _testcapi import unicode_asutf8
 
         bmp = '\u0100'
@@ -365,10 +381,10 @@ def test_asutf8(self):
         self.assertEqual(unicode_asutf8(nonbmp), b'\xf4\x8f\xbf\xbf')
         self.assertRaises(UnicodeEncodeError, unicode_asutf8, 'a\ud800b\udfffc')
 
-    # Test PyUnicode_AsUTF8AndSize()
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_asutf8andsize(self):
+        """Test PyUnicode_AsUTF8AndSize()"""
         from _testcapi import unicode_asutf8andsize
 
         bmp = '\u0100'
@@ -380,54 +396,277 @@ def test_asutf8andsize(self):
         self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4))
         self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc')
 
-    # Test PyUnicode_Count()
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_concat(self):
+        """Test PyUnicode_Concat()"""
+        from _testcapi import unicode_concat as concat
+
+        self.assertEqual(concat('abc', 'def'), 'abcdef')
+        self.assertEqual(concat('abc', 'где'), 'abcгде')
+        self.assertEqual(concat('абв', 'def'), 'абвdef')
+        self.assertEqual(concat('абв', 'где'), 'абвгде')
+        self.assertEqual(concat('a\0b', 'c\0d'), 'a\0bc\0d')
+
+        self.assertRaises(TypeError, concat, b'abc', 'def')
+        self.assertRaises(TypeError, concat, 'abc', b'def')
+        self.assertRaises(TypeError, concat, b'abc', b'def')
+        self.assertRaises(TypeError, concat, [], 'def')
+        self.assertRaises(TypeError, concat, 'abc', [])
+        self.assertRaises(TypeError, concat, [], [])
+        # CRASHES concat(NULL, 'def')
+        # CRASHES concat('abc', NULL)
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_split(self):
+        """Test PyUnicode_Split()"""
+        from _testcapi import unicode_split as split
+
+        self.assertEqual(split('a|b|c|d', '|'), ['a', 'b', 'c', 'd'])
+        self.assertEqual(split('a|b|c|d', '|', 2), ['a', 'b', 'c|d'])
+        self.assertEqual(split('a|b|c|d', '\u20ac'), ['a|b|c|d'])
+        self.assertEqual(split('a||b|c||d', '||'), ['a', 'b|c', 'd'])
+        self.assertEqual(split('а|б|в|г', '|'), ['а', 'б', 'в', 'г'])
+        self.assertEqual(split('абабагаламага', 'а'),
+                         ['', 'б', 'б', 'г', 'л', 'м', 'г', ''])
+        self.assertEqual(split(' a\tb\nc\rd\ve\f', NULL),
+                         ['a', 'b', 'c', 'd', 'e'])
+        self.assertEqual(split('a\x85b\xa0c\u1680d\u2000e', NULL),
+                         ['a', 'b', 'c', 'd', 'e'])
+
+        self.assertRaises(ValueError, split, 'a|b|c|d', '')
+        self.assertRaises(TypeError, split, 'a|b|c|d', ord('|'))
+        self.assertRaises(TypeError, split, [], '|')
+        # split(NULL, '|')
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_rsplit(self):
+        """Test PyUnicode_RSplit()"""
+        from _testcapi import unicode_rsplit as rsplit
+
+        self.assertEqual(rsplit('a|b|c|d', '|'), ['a', 'b', 'c', 'd'])
+        self.assertEqual(rsplit('a|b|c|d', '|', 2), ['a|b', 'c', 'd'])
+        self.assertEqual(rsplit('a|b|c|d', '\u20ac'), ['a|b|c|d'])
+        self.assertEqual(rsplit('a||b|c||d', '||'), ['a', 'b|c', 'd'])
+        self.assertEqual(rsplit('а|б|в|г', '|'), ['а', 'б', 'в', 'г'])
+        self.assertEqual(rsplit('абабагаламага', 'а'),
+                         ['', 'б', 'б', 'г', 'л', 'м', 'г', ''])
+        self.assertEqual(rsplit('aжbжcжd', 'ж'), ['a', 'b', 'c', 'd'])
+        self.assertEqual(rsplit(' a\tb\nc\rd\ve\f', NULL),
+                         ['a', 'b', 'c', 'd', 'e'])
+        self.assertEqual(rsplit('a\x85b\xa0c\u1680d\u2000e', NULL),
+                         ['a', 'b', 'c', 'd', 'e'])
+
+        self.assertRaises(ValueError, rsplit, 'a|b|c|d', '')
+        self.assertRaises(TypeError, rsplit, 'a|b|c|d', ord('|'))
+        self.assertRaises(TypeError, rsplit, [], '|')
+        # rsplit(NULL, '|')
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_partition(self):
+        """Test PyUnicode_Partition()"""
+        from _testcapi import unicode_partition as partition
+
+        self.assertEqual(partition('a|b|c', '|'), ('a', '|', 'b|c'))
+        self.assertEqual(partition('a||b||c', '||'), ('a', '||', 'b||c'))
+        self.assertEqual(partition('а|б|в', '|'), ('а', '|', 'б|в'))
+        self.assertEqual(partition('кабан', 'а'), ('к', 'а', 'бан'))
+        self.assertEqual(partition('aжbжc', 'ж'), ('a', 'ж', 'bжc'))
+
+        self.assertRaises(ValueError, partition, 'a|b|c', '')
+        self.assertRaises(TypeError, partition, b'a|b|c', '|')
+        self.assertRaises(TypeError, partition, 'a|b|c', b'|')
+        self.assertRaises(TypeError, partition, 'a|b|c', ord('|'))
+        self.assertRaises(TypeError, partition, [], '|')
+        # CRASHES partition(NULL, '|')
+        # CRASHES partition('a|b|c', NULL)
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_rpartition(self):
+        """Test PyUnicode_RPartition()"""
+        from _testcapi import unicode_rpartition as rpartition
+
+        self.assertEqual(rpartition('a|b|c', '|'), ('a|b', '|', 'c'))
+        self.assertEqual(rpartition('a||b||c', '||'), ('a||b', '||', 'c'))
+        self.assertEqual(rpartition('а|б|в', '|'), ('а|б', '|', 'в'))
+        self.assertEqual(rpartition('кабан', 'а'), ('каб', 'а', 'н'))
+        self.assertEqual(rpartition('aжbжc', 'ж'), ('aжb', 'ж', 'c'))
+
+        self.assertRaises(ValueError, rpartition, 'a|b|c', '')
+        self.assertRaises(TypeError, rpartition, b'a|b|c', '|')
+        self.assertRaises(TypeError, rpartition, 'a|b|c', b'|')
+        self.assertRaises(TypeError, rpartition, 'a|b|c', ord('|'))
+        self.assertRaises(TypeError, rpartition, [], '|')
+        # CRASHES rpartition(NULL, '|')
+        # CRASHES rpartition('a|b|c', NULL)
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_splitlines(self):
+        """Test PyUnicode_SplitLines()"""
+        from _testcapi import unicode_splitlines as splitlines
+
+        self.assertEqual(splitlines('a\nb\rc\r\nd'), ['a', 'b', 'c', 'd'])
+        self.assertEqual(splitlines('a\nb\rc\r\nd', True),
+                         ['a\n', 'b\r', 'c\r\n', 'd'])
+        self.assertEqual(splitlines('a\x85b\u2028c\u2029d'),
+                         ['a', 'b', 'c', 'd'])
+        self.assertEqual(splitlines('a\x85b\u2028c\u2029d', True),
+                         ['a\x85', 'b\u2028', 'c\u2029', 'd'])
+        self.assertEqual(splitlines('а\nб\rв\r\nг'), ['а', 'б', 'в', 'г'])
+
+        self.assertRaises(TypeError, splitlines, b'a\nb\rc\r\nd')
+        # CRASHES splitlines(NULL)
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_translate(self):
+        """Test PyUnicode_Translate()"""
+        from _testcapi import unicode_translate as translate
+
+        self.assertEqual(translate('abcd', {ord('a'): 'A', ord('b'): ord('B'), ord('c'): '<>'}), 'AB<>d')
+        self.assertEqual(translate('абвг', {ord('а'): 'А', ord('б'): ord('Б'), ord('в'): '<>'}), 'АБ<>г')
+        self.assertEqual(translate('abc', []), 'abc')
+        self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None})
+        self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None}, 'strict')
+        self.assertRaises(LookupError, translate, 'abc', {ord('b'): None}, 'foo')
+        self.assertEqual(translate('abc', {ord('b'): None}, 'ignore'), 'ac')
+        self.assertEqual(translate('abc', {ord('b'): None}, 'replace'), 'a\ufffdc')
+        self.assertEqual(translate('abc', {ord('b'): None}, 'backslashreplace'), r'a\x62c')
+        # XXX Other error handlers do not support UnicodeTranslateError
+        self.assertRaises(TypeError, translate, b'abc', [])
+        self.assertRaises(TypeError, translate, 123, [])
+        self.assertRaises(TypeError, translate, 'abc', {ord('a'): b'A'})
+        self.assertRaises(TypeError, translate, 'abc', 123)
+        self.assertRaises(TypeError, translate, 'abc', NULL)
+        # CRASHES translate(NULL, [])
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_join(self):
+        """Test PyUnicode_Join()"""
+        from _testcapi import unicode_join as join
+        self.assertEqual(join('|', ['a', 'b', 'c']), 'a|b|c')
+        self.assertEqual(join('', ['a', 'b', 'c']), 'abc')
+        self.assertEqual(join(NULL, ['a', 'b', 'c']), 'a b c')
+        self.assertEqual(join('|', ['а', 'б', 'в']), 'а|б|в')
+        self.assertEqual(join('ж', ['а', 'б', 'в']), 'ажбжв')
+        self.assertRaises(TypeError, join, b'|', ['a', 'b', 'c'])
+        self.assertRaises(TypeError, join, '|', [b'a', b'b', b'c'])
+        self.assertRaises(TypeError, join, NULL, [b'a', b'b', b'c'])
+        self.assertRaises(TypeError, join, '|', b'123')
+        self.assertRaises(TypeError, join, '|', 123)
+        self.assertRaises(SystemError, join, '|', NULL)
+
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_count(self):
+        """Test PyUnicode_Count()"""
         from _testcapi import unicode_count
 
-        st = 'abcabd'
-        self.assertEqual(unicode_count(st, 'a', 0, len(st)), 2)
-        self.assertEqual(unicode_count(st, 'ab', 0, len(st)), 2)
-        self.assertEqual(unicode_count(st, 'abc', 0, len(st)), 1)
-        self.assertEqual(unicode_count(st, 'а', 0, len(st)), 0)  # cyrillic "a"
+        for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
+            for i, ch in enumerate(str):
+                self.assertEqual(unicode_count(str, ch, 0, len(str)), 1)
+
+        str = "!>_<!"
+        self.assertEqual(unicode_count(str, 'z', 0, len(str)), 0)
+        self.assertEqual(unicode_count(str, '', 0, len(str)), len(str)+1)
         # start < end
-        self.assertEqual(unicode_count(st, 'a', 3, len(st)), 1)
-        self.assertEqual(unicode_count(st, 'a', 4, len(st)), 0)
-        self.assertEqual(unicode_count(st, 'a', 0, sys.maxsize), 2)
+        self.assertEqual(unicode_count(str, '!', 1, len(str)+1), 1)
         # start >= end
-        self.assertEqual(unicode_count(st, 'abc', 0, 0), 0)
-        self.assertEqual(unicode_count(st, 'a', 3, 2), 0)
-        self.assertEqual(unicode_count(st, 'a', sys.maxsize, 5), 0)
+        self.assertEqual(unicode_count(str, '!', 0, 0), 0)
+        self.assertEqual(unicode_count(str, '!', len(str), 0), 0)
         # negative
-        self.assertEqual(unicode_count(st, 'ab', -len(st), -1), 2)
-        self.assertEqual(unicode_count(st, 'a', -len(st), -3), 1)
-        # wrong args
-        self.assertRaises(TypeError, unicode_count, 'a', 'a')
-        self.assertRaises(TypeError, unicode_count, 'a', 'a', 1)
-        self.assertRaises(TypeError, unicode_count, 1, 'a', 0, 1)
-        self.assertRaises(TypeError, unicode_count, 'a', 1, 0, 1)
-        # empty string
-        self.assertEqual(unicode_count('abc', '', 0, 3), 4)
-        self.assertEqual(unicode_count('abc', '', 1, 3), 3)
-        self.assertEqual(unicode_count('', '', 0, 1), 1)
-        self.assertEqual(unicode_count('', 'a', 0, 1), 0)
-        # different unicode kinds
-        for uni in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
-            for ch in uni:
-                self.assertEqual(unicode_count(uni, ch, 0, len(uni)), 1)
-                self.assertEqual(unicode_count(st, ch, 0, len(st)), 0)
-
-        # subclasses should still work
-        class MyStr(str):
-            pass
-
-        self.assertEqual(unicode_count(MyStr('aab'), 'a', 0, 3), 2)
-
-    # Test PyUnicode_FindChar()
+        self.assertEqual(unicode_count(str, '!', -len(str), -1), 1)
+        # bad arguments
+        self.assertRaises(TypeError, unicode_count, str, b'!', 0, len(str))
+        self.assertRaises(TypeError, unicode_count, b"!>_<!", '!', 0, len(str))
+        self.assertRaises(TypeError, unicode_count, str, ord('!'), 0, len(str))
+        self.assertRaises(TypeError, unicode_count, [], '!', 0, len(str), 1)
+        # CRASHES unicode_count(NULL, '!', 0, len(str))
+        # CRASHES unicode_count(str, NULL, 0, len(str))
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_tailmatch(self):
+        """Test PyUnicode_Tailmatch()"""
+        from _testcapi import unicode_tailmatch as tailmatch
+
+        #for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
+            #for i, ch in enumerate(str):
+                #self.assertEqual(tailmatch(str, ch, 0, len(str), 1), i)
+                #self.assertEqual(tailmatch(str, ch, 0, len(str), -1), i)
+
+        str = 'ababahalamaha'
+        self.assertEqual(tailmatch(str, 'aba', 0, len(str), -1), 1)
+        self.assertEqual(tailmatch(str, 'aha', 0, len(str), 1), 1)
+
+        self.assertEqual(tailmatch(str, 'aba', 0, sys.maxsize, -1), 1)
+        self.assertEqual(tailmatch(str, 'aba', -len(str), sys.maxsize, -1), 1)
+        self.assertEqual(tailmatch(str, 'aba', -sys.maxsize-1, len(str), -1), 1)
+        self.assertEqual(tailmatch(str, 'aha', 0, sys.maxsize, 1), 1)
+        self.assertEqual(tailmatch(str, 'aha', -sys.maxsize-1, len(str), 1), 1)
+
+        self.assertEqual(tailmatch(str, 'z', 0, len(str), 1), 0)
+        self.assertEqual(tailmatch(str, 'z', 0, len(str), -1), 0)
+        self.assertEqual(tailmatch(str, '', 0, len(str), 1), 1)
+        self.assertEqual(tailmatch(str, '', 0, len(str), -1), 1)
+
+        self.assertEqual(tailmatch(str, 'ba', 0, len(str)-1, -1), 0)
+        self.assertEqual(tailmatch(str, 'ba', 1, len(str)-1, -1), 1)
+        self.assertEqual(tailmatch(str, 'aba', 1, len(str)-1, -1), 0)
+        self.assertEqual(tailmatch(str, 'ba', -len(str)+1, -1, -1), 1)
+        self.assertEqual(tailmatch(str, 'ah', 0, len(str), 1), 0)
+        self.assertEqual(tailmatch(str, 'ah', 0, len(str)-1, 1), 1)
+        self.assertEqual(tailmatch(str, 'ah', -len(str), -1, 1), 1)
+
+        # bad arguments
+        self.assertRaises(TypeError, tailmatch, str, ('aba', 'aha'), 0, len(str), -1)
+        self.assertRaises(TypeError, tailmatch, str, ('aba', 'aha'), 0, len(str), 1)
+        # CRASHES tailmatch(NULL, 'aba', 0, len(str), -1)
+        # CRASHES tailmatch(str, NULL, 0, len(str), -1)
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_find(self):
+        """Test PyUnicode_Find()"""
+        from _testcapi import unicode_find as find
+
+        for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
+            for i, ch in enumerate(str):
+                self.assertEqual(find(str, ch, 0, len(str), 1), i)
+                self.assertEqual(find(str, ch, 0, len(str), -1), i)
+
+        str = "!>_<!"
+        self.assertEqual(find(str, 'z', 0, len(str), 1), -1)
+        self.assertEqual(find(str, 'z', 0, len(str), -1), -1)
+        self.assertEqual(find(str, '', 0, len(str), 1), 0)
+        self.assertEqual(find(str, '', 0, len(str), -1), len(str))
+        # start < end
+        self.assertEqual(find(str, '!', 1, len(str)+1, 1), 4)
+        self.assertEqual(find(str, '!', 1, len(str)+1, -1), 4)
+        # start >= end
+        self.assertEqual(find(str, '!', 0, 0, 1), -1)
+        self.assertEqual(find(str, '!', len(str), 0, 1), -1)
+        # negative
+        self.assertEqual(find(str, '!', -len(str), -1, 1), 0)
+        self.assertEqual(find(str, '!', -len(str), -1, -1), 0)
+        # bad arguments
+        self.assertRaises(TypeError, find, str, b'!', 0, len(str), 1)
+        self.assertRaises(TypeError, find, b"!>_<!", '!', 0, len(str), 1)
+        self.assertRaises(TypeError, find, str, ord('!'), 0, len(str), 1)
+        self.assertRaises(TypeError, find, [], '!', 0, len(str), 1)
+        # CRASHES find(NULL, '!', 0, len(str), 1)
+        # CRASHES find(str, NULL, 0, len(str), 1)
+
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_findchar(self):
+        """Test PyUnicode_FindChar()"""
         from _testcapi import unicode_findchar
 
         for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
@@ -447,11 +686,167 @@ def test_findchar(self):
         # negative
         self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, 1), 0)
         self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, -1), 0)
+        # bad arguments
+        # CRASHES unicode_findchar(b"!>_<!", ord('!'), 0, len(str), 1)
+        # CRASHES unicode_findchar([], ord('!'), 0, len(str), 1)
+        # CRASHES unicode_findchar(NULL, ord('!'), 0, len(str), 1), 1)
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_replace(self):
+        """Test PyUnicode_Replace()"""
+        from _testcapi import unicode_replace as replace
+
+        str = 'abracadabra'
+        self.assertEqual(replace(str, 'a', '='), '=br=c=d=br=')
+        self.assertEqual(replace(str, 'a', '<>'), '<>br<>c<>d<>br<>')
+        self.assertEqual(replace(str, 'abra', '='), '=cad=')
+        self.assertEqual(replace(str, 'a', '=', 2), '=br=cadabra')
+        self.assertEqual(replace(str, 'a', '=', 0), str)
+        self.assertEqual(replace(str, 'a', '=', sys.maxsize), '=br=c=d=br=')
+        self.assertEqual(replace(str, 'z', '='), str)
+        self.assertEqual(replace(str, '', '='), '=a=b=r=a=c=a=d=a=b=r=a=')
+        self.assertEqual(replace(str, 'a', 'ж'), 'жbrжcжdжbrж')
+        self.assertEqual(replace('абабагаламага', 'а', '='), '=б=б=г=л=м=г=')
+        self.assertEqual(replace('Баден-Баден', 'Баден', 'Baden'), 'Baden-Baden')
+        # bad arguments
+        self.assertRaises(TypeError, replace, 'a', 'a', b'=')
+        self.assertRaises(TypeError, replace, 'a', b'a', '=')
+        self.assertRaises(TypeError, replace, b'a', 'a', '=')
+        self.assertRaises(TypeError, replace, 'a', 'a', ord('='))
+        self.assertRaises(TypeError, replace, 'a', ord('a'), '=')
+        self.assertRaises(TypeError, replace, [], 'a', '=')
+        # CRASHES replace('a', 'a', NULL)
+        # CRASHES replace('a', NULL, '=')
+        # CRASHES replace(NULL, 'a', '=')
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_compare(self):
+        """Test PyUnicode_Compare()"""
+        from _testcapi import unicode_compare as compare
+
+        self.assertEqual(compare('abc', 'abc'), 0)
+        self.assertEqual(compare('abc', 'def'), -1)
+        self.assertEqual(compare('def', 'abc'), 1)
+        self.assertEqual(compare('abc', 'abc\0def'), -1)
+        self.assertEqual(compare('abc\0def', 'abc\0def'), 0)
+        self.assertEqual(compare('абв', 'abc'), 1)
+
+        self.assertRaises(TypeError, compare, b'abc', 'abc')
+        self.assertRaises(TypeError, compare, 'abc', b'abc')
+        self.assertRaises(TypeError, compare, b'abc', b'abc')
+        self.assertRaises(TypeError, compare, [], 'abc')
+        self.assertRaises(TypeError, compare, 'abc', [])
+        self.assertRaises(TypeError, compare, [], [])
+        # CRASHES compare(NULL, 'abc')
+        # CRASHES compare('abc', NULL)
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_comparewithasciistring(self):
+        """Test PyUnicode_CompareWithASCIIString()"""
+        from _testcapi import unicode_comparewithasciistring as comparewithasciistring
+
+        self.assertEqual(comparewithasciistring('abc', b'abc'), 0)
+        self.assertEqual(comparewithasciistring('abc', b'def'), -1)
+        self.assertEqual(comparewithasciistring('def', b'abc'), 1)
+        self.assertEqual(comparewithasciistring('abc', b'abc\0def'), 0)
+        self.assertEqual(comparewithasciistring('abc\0def', b'abc\0def'), 1)
+        self.assertEqual(comparewithasciistring('абв', b'abc'), 1)
+
+        # CRASHES comparewithasciistring(b'abc', b'abc')
+        # CRASHES comparewithasciistring([], b'abc')
+        # CRASHES comparewithasciistring(NULL, b'abc')
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_richcompare(self):
+        """Test PyUnicode_RichCompare()"""
+        from _testcapi import unicode_richcompare as richcompare
+
+        LT, LE, EQ, NE, GT, GE = range(6)
+        strings = ('abc', 'абв', '\U0001f600', 'abc\0')
+        for s1 in strings:
+            for s2 in strings:
+                self.assertIs(richcompare(s1, s2, LT), s1 < s2)
+                self.assertIs(richcompare(s1, s2, LE), s1 <= s2)
+                self.assertIs(richcompare(s1, s2, EQ), s1 == s2)
+                self.assertIs(richcompare(s1, s2, NE), s1 != s2)
+                self.assertIs(richcompare(s1, s2, GT), s1 > s2)
+                self.assertIs(richcompare(s1, s2, GE), s1 >= s2)
+
+        for op in LT, LE, EQ, NE, GT, GE:
+            self.assertIs(richcompare(b'abc', 'abc', op), NotImplemented)
+            self.assertIs(richcompare('abc', b'abc', op), NotImplemented)
+            self.assertIs(richcompare(b'abc', b'abc', op), NotImplemented)
+            self.assertIs(richcompare([], 'abc', op), NotImplemented)
+            self.assertIs(richcompare('abc', [], op), NotImplemented)
+            self.assertIs(richcompare([], [], op), NotImplemented)
+
+            # CRASHES richcompare(NULL, 'abc', op)
+            # CRASHES richcompare('abc', NULL, op)
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_format(self):
+        """Test PyUnicode_Contains()"""
+        from _testcapi import unicode_format as format
+
+        self.assertEqual(format('x=%d!', 42), 'x=42!')
+        self.assertEqual(format('x=%d!', (42,)), 'x=42!')
+        self.assertEqual(format('x=%d y=%s!', (42, [])), 'x=42 y=[]!')
+
+        self.assertRaises(SystemError, format, 'x=%d!', NULL)
+        self.assertRaises(SystemError, format, NULL, 42)
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_contains(self):
+        """Test PyUnicode_Contains()"""
+        from _testcapi import unicode_contains as contains
+
+        self.assertEqual(contains('abcd', ''), 1)
+        self.assertEqual(contains('abcd', 'b'), 1)
+        self.assertEqual(contains('abcd', 'x'), 0)
+        self.assertEqual(contains('abcd', 'ж'), 0)
+        self.assertEqual(contains('abcd', '\0'), 0)
+        self.assertEqual(contains('abc\0def', '\0'), 1)
+        self.assertEqual(contains('abcd', 'bc'), 1)
+
+        self.assertRaises(TypeError, contains, b'abcd', 'b')
+        self.assertRaises(TypeError, contains, 'abcd', b'b')
+        self.assertRaises(TypeError, contains, b'abcd', b'b')
+        self.assertRaises(TypeError, contains, [], 'b')
+        self.assertRaises(TypeError, contains, 'abcd', ord('b'))
+        # CRASHES contains(NULL, 'b')
+        # CRASHES contains('abcd', NULL)
+
+    @support.cpython_only
+    @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+    def test_isidentifier(self):
+        """Test PyUnicode_IsIdentifier()"""
+        from _testcapi import unicode_isidentifier as isidentifier
+
+        self.assertEqual(isidentifier("a"), 1)
+        self.assertEqual(isidentifier("b0"), 1)
+        self.assertEqual(isidentifier("µ"), 1)
+        self.assertEqual(isidentifier("𝔘𝔫𝔦𝔠𝔬𝔡𝔢"), 1)
+
+        self.assertEqual(isidentifier(""), 0)
+        self.assertEqual(isidentifier(" "), 0)
+        self.assertEqual(isidentifier("["), 0)
+        self.assertEqual(isidentifier("©"), 0)
+        self.assertEqual(isidentifier("0"), 0)
+
+        # CRASHES isidentifier(b"a")
+        # CRASHES isidentifier([])
+        # CRASHES isidentifier(NULL)
 
-    # Test PyUnicode_CopyCharacters()
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_copycharacters(self):
+        """Test PyUnicode_CopyCharacters()"""
         from _testcapi import unicode_copycharacters
 
         strings = [
diff --git a/Misc/NEWS.d/next/Tests/2022-11-19-13-34-28.gh-issue-99593.8ZfCkj.rst b/Misc/NEWS.d/next/Tests/2022-11-19-13-34-28.gh-issue-99593.8ZfCkj.rst
new file mode 100644
index 00000000000000..ec4cda2080323f
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2022-11-19-13-34-28.gh-issue-99593.8ZfCkj.rst
@@ -0,0 +1 @@
+Cover the Unicode C API with tests.
diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c
index d5c4a9e5b95ec6..4c5049dd406a7c 100644
--- a/Modules/_testcapi/unicode.c
+++ b/Modules/_testcapi/unicode.c
@@ -1,3 +1,4 @@
+#define PY_SSIZE_T_CLEAN
 #include "parts.h"
 
 static struct PyModuleDef *_testcapimodule = NULL;  // set at initialization
@@ -99,6 +100,17 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored))
     Py_RETURN_NONE;
 }
 
+#define NULLABLE(x) do { if (x == Py_None) x = NULL; } while (0);
+
+/* Test PyUnicode_FromObject() */
+static PyObject *
+unicode_fromobject(PyObject *self, PyObject *arg)
+{
+    NULLABLE(arg);
+    return PyUnicode_FromObject(arg);
+}
+
+/* Test PyUnicode_AsWideChar() */
 static PyObject *
 unicode_aswidechar(PyObject *self, PyObject *args)
 {
@@ -130,6 +142,7 @@ unicode_aswidechar(PyObject *self, PyObject *args)
     return Py_BuildValue("(Nn)", result, size);
 }
 
+/* Test PyUnicode_AsWideCharString() */
 static PyObject *
 unicode_aswidecharstring(PyObject *self, PyObject *args)
 {
@@ -151,6 +164,7 @@ unicode_aswidecharstring(PyObject *self, PyObject *args)
     return Py_BuildValue("(Nn)", result, size);
 }
 
+/* Test PyUnicode_AsUCS4() */
 static PyObject *
 unicode_asucs4(PyObject *self, PyObject *args)
 {
@@ -181,6 +195,7 @@ unicode_asucs4(PyObject *self, PyObject *args)
     return result;
 }
 
+/* Test PyUnicode_AsUTF8() */
 static PyObject *
 unicode_asutf8(PyObject *self, PyObject *args)
 {
@@ -199,6 +214,7 @@ unicode_asutf8(PyObject *self, PyObject *args)
     return PyBytes_FromString(buffer);
 }
 
+/* Test PyUnicode_AsUTF8AndSize() */
 static PyObject *
 unicode_asutf8andsize(PyObject *self, PyObject *args)
 {
@@ -223,26 +239,194 @@ unicode_asutf8andsize(PyObject *self, PyObject *args)
     return Py_BuildValue("(Nn)", result, utf8_len);
 }
 
+/* Test PyUnicode_Concat() */
+static PyObject *
+unicode_concat(PyObject *self, PyObject *args)
+{
+    PyObject *left;
+    PyObject *right;
+
+    if (!PyArg_ParseTuple(args, "OO", &left, &right))
+        return NULL;
+
+    NULLABLE(left);
+    NULLABLE(right);
+    return PyUnicode_Concat(left, right);
+}
+
+/* Test PyUnicode_Split() */
+static PyObject *
+unicode_split(PyObject *self, PyObject *args)
+{
+    PyObject *s;
+    PyObject *sep;
+    Py_ssize_t maxsplit = -1;
+
+    if (!PyArg_ParseTuple(args, "OO|n", &s, &sep, &maxsplit))
+        return NULL;
+
+    NULLABLE(s);
+    NULLABLE(sep);
+    return PyUnicode_Split(s, sep, maxsplit);
+}
+
+/* Test PyUnicode_RSplit() */
+static PyObject *
+unicode_rsplit(PyObject *self, PyObject *args)
+{
+    PyObject *s;
+    PyObject *sep;
+    Py_ssize_t maxsplit = -1;
+
+    if (!PyArg_ParseTuple(args, "OO|n", &s, &sep, &maxsplit))
+        return NULL;
+
+    NULLABLE(s);
+    NULLABLE(sep);
+    return PyUnicode_RSplit(s, sep, maxsplit);
+}
+
+/* Test PyUnicode_Splitlines() */
+static PyObject *
+unicode_splitlines(PyObject *self, PyObject *args)
+{
+    PyObject *s;
+    int keepends = 0;
+
+    if (!PyArg_ParseTuple(args, "O|i", &s, &keepends))
+        return NULL;
+
+    NULLABLE(s);
+    return PyUnicode_Splitlines(s, keepends);
+}
+
+/* Test PyUnicode_Partition() */
+static PyObject *
+unicode_partition(PyObject *self, PyObject *args)
+{
+    PyObject *s;
+    PyObject *sep;
+
+    if (!PyArg_ParseTuple(args, "OO", &s, &sep))
+        return NULL;
+
+    NULLABLE(s);
+    NULLABLE(sep);
+    return PyUnicode_Partition(s, sep);
+}
+
+/* Test PyUnicode_RPartition() */
+static PyObject *
+unicode_rpartition(PyObject *self, PyObject *args)
+{
+    PyObject *s;
+    PyObject *sep;
+
+    if (!PyArg_ParseTuple(args, "OO", &s, &sep))
+        return NULL;
+
+    NULLABLE(s);
+    NULLABLE(sep);
+    return PyUnicode_RPartition(s, sep);
+}
+
+/* Test PyUnicode_Translate() */
+static PyObject *
+unicode_translate(PyObject *self, PyObject *args)
+{
+    PyObject *obj;
+    PyObject *table;
+    const char *errors = NULL;
+
+    if (!PyArg_ParseTuple(args, "OO|z", &obj, &table, &errors))
+        return NULL;
+
+    NULLABLE(obj);
+    NULLABLE(table);
+    return PyUnicode_Translate(obj, table, errors);
+}
+
+/* Test PyUnicode_Join() */
+static PyObject *
+unicode_join(PyObject *self, PyObject *args)
+{
+    PyObject *sep;
+    PyObject *seq;
+
+    if (!PyArg_ParseTuple(args, "OO", &sep, &seq))
+        return NULL;
+
+    NULLABLE(sep);
+    NULLABLE(seq);
+    return PyUnicode_Join(sep, seq);
+}
+
+/* Test PyUnicode_Count() */
 static PyObject *
 unicode_count(PyObject *self, PyObject *args)
 {
     PyObject *str;
     PyObject *substr;
+    Py_ssize_t start;
+    Py_ssize_t end;
     Py_ssize_t result;
-    Py_ssize_t start, end;
 
-    if (!PyArg_ParseTuple(args, "UUnn:unicode_count", &str, &substr,
-                          &start, &end)) {
+    if (!PyArg_ParseTuple(args, "OOnn", &str, &substr, &start, &end))
         return NULL;
-    }
 
+    NULLABLE(str);
+    NULLABLE(substr);
     result = PyUnicode_Count(str, substr, start, end);
     if (result == -1)
         return NULL;
-    else
-        return PyLong_FromSsize_t(result);
+    return PyLong_FromSsize_t(result);
+}
+
+/* Test PyUnicode_Find() */
+static PyObject *
+unicode_find(PyObject *self, PyObject *args)
+{
+    PyObject *str;
+    PyObject *substr;
+    Py_ssize_t start;
+    Py_ssize_t end;
+    int direction;
+    Py_ssize_t result;
+
+    if (!PyArg_ParseTuple(args, "OOnni", &str, &substr, &start, &end, &direction))
+        return NULL;
+
+    NULLABLE(str);
+    NULLABLE(substr);
+    result = PyUnicode_Find(str, substr, start, end, direction);
+    if (result == -2)
+        return NULL;
+    return PyLong_FromSsize_t(result);
 }
 
+/* Test PyUnicode_Tailmatch() */
+static PyObject *
+unicode_tailmatch(PyObject *self, PyObject *args)
+{
+    PyObject *str;
+    PyObject *substr;
+    Py_ssize_t start;
+    Py_ssize_t end;
+    int direction;
+    Py_ssize_t result;
+
+    if (!PyArg_ParseTuple(args, "OOnni", &str, &substr, &start, &end, &direction))
+        return NULL;
+
+    NULLABLE(str);
+    NULLABLE(substr);
+    result = PyUnicode_Tailmatch(str, substr, start, end, direction);
+    if (result == -1)
+        return NULL;
+    return PyLong_FromSsize_t(result);
+}
+
+/* Test PyUnicode_FindChar() */
 static PyObject *
 unicode_findchar(PyObject *self, PyObject *args)
 {
@@ -264,6 +448,130 @@ unicode_findchar(PyObject *self, PyObject *args)
         return PyLong_FromSsize_t(result);
 }
 
+/* Test PyUnicode_Replace() */
+static PyObject *
+unicode_replace(PyObject *self, PyObject *args)
+{
+    PyObject *str;
+    PyObject *substr;
+    PyObject *replstr;
+    Py_ssize_t maxcount = -1;
+
+    if (!PyArg_ParseTuple(args, "OOO|n", &str, &substr, &replstr, &maxcount))
+        return NULL;
+
+    NULLABLE(str);
+    NULLABLE(substr);
+    NULLABLE(replstr);
+    return PyUnicode_Replace(str, substr, replstr, maxcount);
+}
+
+/* Test PyUnicode_Compare() */
+static PyObject *
+unicode_compare(PyObject *self, PyObject *args)
+{
+    PyObject *left;
+    PyObject *right;
+    int result;
+
+    if (!PyArg_ParseTuple(args, "OO", &left, &right))
+        return NULL;
+
+    NULLABLE(left);
+    NULLABLE(right);
+    result = PyUnicode_Compare(left, right);
+    if (result == -1 && PyErr_Occurred()) {
+        return NULL;
+    }
+    return PyLong_FromLong(result);
+}
+
+/* Test PyUnicode_CompareWithASCIIString() */
+static PyObject *
+unicode_comparewithasciistring(PyObject *self, PyObject *args)
+{
+    PyObject *left;
+    const char *right = NULL;
+    Py_ssize_t right_len;
+    int result;
+
+    if (!PyArg_ParseTuple(args, "O|y#", &left, &right, &right_len))
+        return NULL;
+
+    NULLABLE(left);
+    result = PyUnicode_CompareWithASCIIString(left, right);
+    if (result == -1 && PyErr_Occurred()) {
+        return NULL;
+    }
+    return PyLong_FromLong(result);
+}
+
+/* Test PyUnicode_RichCompare() */
+static PyObject *
+unicode_richcompare(PyObject *self, PyObject *args)
+{
+    PyObject *left;
+    PyObject *right;
+    int op;
+
+    if (!PyArg_ParseTuple(args, "OOi", &left, &right, &op))
+        return NULL;
+
+    NULLABLE(left);
+    NULLABLE(right);
+    return PyUnicode_RichCompare(left, right, op);
+}
+
+/* Test PyUnicode_Format() */
+static PyObject *
+unicode_format(PyObject *self, PyObject *args)
+{
+    PyObject *format;
+    PyObject *fargs;
+
+    if (!PyArg_ParseTuple(args, "OO", &format, &fargs))
+        return NULL;
+
+    NULLABLE(format);
+    NULLABLE(fargs);
+    return PyUnicode_Format(format, fargs);
+}
+
+/* Test PyUnicode_Contains() */
+static PyObject *
+unicode_contains(PyObject *self, PyObject *args)
+{
+    PyObject *container;
+    PyObject *element;
+    int result;
+
+    if (!PyArg_ParseTuple(args, "OO", &container, &element))
+        return NULL;
+
+    NULLABLE(container);
+    NULLABLE(element);
+    result = PyUnicode_Contains(container, element);
+    if (result == -1 && PyErr_Occurred()) {
+        return NULL;
+    }
+    return PyLong_FromLong(result);
+}
+
+/* Test PyUnicode_IsIdentifier() */
+static PyObject *
+unicode_isidentifier(PyObject *self, PyObject *arg)
+{
+    int result;
+
+    NULLABLE(arg);
+    result = PyUnicode_IsIdentifier(arg);
+    if (result == -1 && PyErr_Occurred()) {
+        return NULL;
+    }
+    return PyLong_FromLong(result);
+}
+
+/* Test PyUnicode_CopyCharacters() */
 static PyObject *
 unicode_copycharacters(PyObject *self, PyObject *args)
 {
@@ -711,13 +1019,31 @@ static PyMethodDef TestMethods[] = {
      test_unicode_compare_with_ascii,                            METH_NOARGS},
     {"test_string_from_format",  test_string_from_format,        METH_NOARGS},
     {"test_widechar",            test_widechar,                  METH_NOARGS},
+    {"unicode_fromobject",       unicode_fromobject,             METH_O},
     {"unicode_aswidechar",       unicode_aswidechar,             METH_VARARGS},
     {"unicode_aswidecharstring", unicode_aswidecharstring,       METH_VARARGS},
     {"unicode_asucs4",           unicode_asucs4,                 METH_VARARGS},
     {"unicode_asutf8",           unicode_asutf8,                 METH_VARARGS},
     {"unicode_asutf8andsize",    unicode_asutf8andsize,          METH_VARARGS},
+    {"unicode_concat",           unicode_concat,                 METH_VARARGS},
+    {"unicode_splitlines",       unicode_splitlines,             METH_VARARGS},
+    {"unicode_split",            unicode_split,                  METH_VARARGS},
+    {"unicode_rsplit",           unicode_rsplit,                 METH_VARARGS},
+    {"unicode_partition",        unicode_partition,              METH_VARARGS},
+    {"unicode_rpartition",       unicode_rpartition,             METH_VARARGS},
+    {"unicode_translate",        unicode_translate,              METH_VARARGS},
+    {"unicode_join",             unicode_join,                   METH_VARARGS},
     {"unicode_count",            unicode_count,                  METH_VARARGS},
+    {"unicode_tailmatch",        unicode_tailmatch,              METH_VARARGS},
+    {"unicode_find",             unicode_find,                   METH_VARARGS},
     {"unicode_findchar",         unicode_findchar,               METH_VARARGS},
+    {"unicode_replace",          unicode_replace,                METH_VARARGS},
+    {"unicode_compare",          unicode_compare,                METH_VARARGS},
+    {"unicode_comparewithasciistring",unicode_comparewithasciistring,METH_VARARGS},
+    {"unicode_richcompare",      unicode_richcompare,            METH_VARARGS},
+    {"unicode_format",           unicode_format,                 METH_VARARGS},
+    {"unicode_contains",         unicode_contains,               METH_VARARGS},
+    {"unicode_isidentifier",     unicode_isidentifier,           METH_O},
     {"unicode_copycharacters",   unicode_copycharacters,         METH_VARARGS},
     {NULL},
 };

From 545400a95a6314192f4f8b9487c2c699d0f18db7 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Sun, 27 Nov 2022 09:52:51 +0200
Subject: [PATCH 2/2] Address review comments.

---
 Lib/test/test_capi/test_unicode.py | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index f09a8fc22e3d5c..857579f758386f 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -11,6 +11,10 @@
 
 NULL = None
 
+class Str(str):
+    pass
+
+
 class CAPITest(unittest.TestCase):
 
     @support.cpython_only
@@ -22,6 +26,11 @@ def test_fromobject(self):
         for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
                   'a\ud800b\udfffc', '\ud834\udd1e']:
             self.assertEqual(fromobject(s), s)
+            o = Str(s)
+            s2 = fromobject(o)
+            self.assertEqual(s2, s)
+            self.assertIs(type(s2), str)
+            self.assertIsNot(s2, s)
 
         self.assertRaises(TypeError, fromobject, b'abc')
         self.assertRaises(TypeError, fromobject, [])
@@ -438,7 +447,7 @@ def test_split(self):
         self.assertRaises(ValueError, split, 'a|b|c|d', '')
         self.assertRaises(TypeError, split, 'a|b|c|d', ord('|'))
         self.assertRaises(TypeError, split, [], '|')
-        # split(NULL, '|')
+        # CRASHES split(NULL, '|')
 
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
@@ -462,7 +471,7 @@ def test_rsplit(self):
         self.assertRaises(ValueError, rsplit, 'a|b|c|d', '')
         self.assertRaises(TypeError, rsplit, 'a|b|c|d', ord('|'))
         self.assertRaises(TypeError, rsplit, [], '|')
-        # rsplit(NULL, '|')
+        # CRASHES rsplit(NULL, '|')
 
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
@@ -530,6 +539,7 @@ def test_translate(self):
 
         self.assertEqual(translate('abcd', {ord('a'): 'A', ord('b'): ord('B'), ord('c'): '<>'}), 'AB<>d')
         self.assertEqual(translate('абвг', {ord('а'): 'А', ord('б'): ord('Б'), ord('в'): '<>'}), 'АБ<>г')
+        self.assertEqual(translate('abc', {}), 'abc')
         self.assertEqual(translate('abc', []), 'abc')
         self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None})
         self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None}, 'strict')
@@ -543,6 +553,7 @@ def test_translate(self):
         self.assertRaises(TypeError, translate, 'abc', {ord('a'): b'A'})
         self.assertRaises(TypeError, translate, 'abc', 123)
         self.assertRaises(TypeError, translate, 'abc', NULL)
+        self.assertRaises(LookupError, translate, 'abc', {ord('b'): None}, 'foo')
         # CRASHES translate(NULL, [])
 
     @support.cpython_only
@@ -551,6 +562,7 @@ def test_join(self):
         """Test PyUnicode_Join()"""
         from _testcapi import unicode_join as join
         self.assertEqual(join('|', ['a', 'b', 'c']), 'a|b|c')
+        self.assertEqual(join('|', ['a', '', 'c']), 'a||c')
         self.assertEqual(join('', ['a', 'b', 'c']), 'abc')
         self.assertEqual(join(NULL, ['a', 'b', 'c']), 'a b c')
         self.assertEqual(join('|', ['а', 'б', 'в']), 'а|б|в')
@@ -596,11 +608,6 @@ def test_tailmatch(self):
         """Test PyUnicode_Tailmatch()"""
         from _testcapi import unicode_tailmatch as tailmatch
 
-        #for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
-            #for i, ch in enumerate(str):
-                #self.assertEqual(tailmatch(str, ch, 0, len(str), 1), i)
-                #self.assertEqual(tailmatch(str, ch, 0, len(str), -1), i)
-
         str = 'ababahalamaha'
         self.assertEqual(tailmatch(str, 'aba', 0, len(str), -1), 1)
         self.assertEqual(tailmatch(str, 'aha', 0, len(str), 1), 1)
@@ -790,7 +797,7 @@ def test_richcompare(self):
     @support.cpython_only
     @unittest.skipIf(_testcapi is None, 'need _testcapi module')
     def test_format(self):
-        """Test PyUnicode_Contains()"""
+        """Test PyUnicode_Format()"""
         from _testcapi import unicode_format as format
 
         self.assertEqual(format('x=%d!', 42), 'x=42!')
@@ -838,6 +845,7 @@ def test_isidentifier(self):
         self.assertEqual(isidentifier("["), 0)
         self.assertEqual(isidentifier("©"), 0)
         self.assertEqual(isidentifier("0"), 0)
+        self.assertEqual(isidentifier("32M"), 0)
 
         # CRASHES isidentifier(b"a")
         # CRASHES isidentifier([])

<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Transitional//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'>
<html xmlns='http://www.w3.org/1999/xhtml'>
<head>
<title>pFad - Phonifier reborn</title>
<meta http-equiv='Content-Type' content='text/html; charset=utf-8' />
</head>
<body>
<h1>Pfad - The Proxy pFad of &#169; 2024 Garber Painting. All rights reserved.</h1>


<!-- Disclaimer -->
<p>Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.</p>
<br>
<p>Alternative Proxies:</p><p><a href="http://rainy.clevelandohioweatherforecast.com/php-proxy/index.php?q=https://patch-diff.githubusercontent.com/raw/python/cpython/pull/99651.patch" target="_blank">Alternative Proxy</a></p><p><a href="http://rainy.clevelandohioweatherforecast.com/pFad/index.php?u=https://patch-diff.githubusercontent.com/raw/python/cpython/pull/99651.patch" target="_blank">pFad Proxy</a></p><p><a href="http://rainy.clevelandohioweatherforecast.com/pFad/v3index.php?u=https://patch-diff.githubusercontent.com/raw/python/cpython/pull/99651.patch" target="_blank">pFad v3 Proxy</a></p><p><a href="http://rainy.clevelandohioweatherforecast.com/pFad/v4index.php?u=https://patch-diff.githubusercontent.com/raw/python/cpython/pull/99651.patch" target="_blank">pFad v4 Proxy</a></p></body>
</html>