Skip to content

gh-99593: Add tests for Unicode C API (part 1) #99651

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 29, 2022
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Address review comments.
  • Loading branch information
serhiy-storchaka committed Nov 27, 2022
commit 545400a95a6314192f4f8b9487c2c699d0f18db7
24 changes: 16 additions & 8 deletions Lib/test/test_capi/test_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@

NULL = None

class Str(str):
pass


class CAPITest(unittest.TestCase):

@support.cpython_only
Expand All @@ -22,6 +26,11 @@ def test_fromobject(self):
for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
'a\ud800b\udfffc', '\ud834\udd1e']:
self.assertEqual(fromobject(s), s)
o = Str(s)
s2 = fromobject(o)
self.assertEqual(s2, s)
self.assertIs(type(s2), str)
self.assertIsNot(s2, s)

self.assertRaises(TypeError, fromobject, b'abc')
self.assertRaises(TypeError, fromobject, [])
Expand Down Expand Up @@ -438,7 +447,7 @@ def test_split(self):
self.assertRaises(ValueError, split, 'a|b|c|d', '')
self.assertRaises(TypeError, split, 'a|b|c|d', ord('|'))
self.assertRaises(TypeError, split, [], '|')
# split(NULL, '|')
# CRASHES split(NULL, '|')

@support.cpython_only
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
Expand All @@ -462,7 +471,7 @@ def test_rsplit(self):
self.assertRaises(ValueError, rsplit, 'a|b|c|d', '')
self.assertRaises(TypeError, rsplit, 'a|b|c|d', ord('|'))
self.assertRaises(TypeError, rsplit, [], '|')
# rsplit(NULL, '|')
# CRASHES rsplit(NULL, '|')

@support.cpython_only
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
Expand Down Expand Up @@ -530,6 +539,7 @@ def test_translate(self):

self.assertEqual(translate('abcd', {ord('a'): 'A', ord('b'): ord('B'), ord('c'): '<>'}), 'AB<>d')
self.assertEqual(translate('абвг', {ord('а'): 'А', ord('б'): ord('Б'), ord('в'): '<>'}), 'АБ<>г')
self.assertEqual(translate('abc', {}), 'abc')
self.assertEqual(translate('abc', []), 'abc')
self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand. None is supposed to delete the "b" character: https://docs.python.org/dev/library/stdtypes.html#text-sequence-type-str

The mapping table must map Unicode ordinal integers to Unicode ordinal integers or None (causing deletion of the character).

Is the doc wrong?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The doc is wrong.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah. The surprising part is that str.translate() treats None as "delete:

>>> "abc".translate(str.maketrans({'b': None}))
'ac'

Well, it would be nice to update the doc (maybe in a separated PR).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because str.translate calls PyUnicode_Translate() with the error handler "ignore".

self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None}, 'strict')
Expand All @@ -543,6 +553,7 @@ def test_translate(self):
self.assertRaises(TypeError, translate, 'abc', {ord('a'): b'A'})
self.assertRaises(TypeError, translate, 'abc', 123)
self.assertRaises(TypeError, translate, 'abc', NULL)
self.assertRaises(LookupError, translate, 'abc', {ord('b'): None}, 'foo')
# CRASHES translate(NULL, [])

@support.cpython_only
Expand All @@ -551,6 +562,7 @@ def test_join(self):
"""Test PyUnicode_Join()"""
from _testcapi import unicode_join as join
self.assertEqual(join('|', ['a', 'b', 'c']), 'a|b|c')
self.assertEqual(join('|', ['a', '', 'c']), 'a||c')
self.assertEqual(join('', ['a', 'b', 'c']), 'abc')
self.assertEqual(join(NULL, ['a', 'b', 'c']), 'a b c')
self.assertEqual(join('|', ['а', 'б', 'в']), 'а|б|в')
Expand Down Expand Up @@ -596,11 +608,6 @@ def test_tailmatch(self):
"""Test PyUnicode_Tailmatch()"""
from _testcapi import unicode_tailmatch as tailmatch

#for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
#for i, ch in enumerate(str):
#self.assertEqual(tailmatch(str, ch, 0, len(str), 1), i)
#self.assertEqual(tailmatch(str, ch, 0, len(str), -1), i)

str = 'ababahalamaha'
self.assertEqual(tailmatch(str, 'aba', 0, len(str), -1), 1)
self.assertEqual(tailmatch(str, 'aha', 0, len(str), 1), 1)
Expand Down Expand Up @@ -790,7 +797,7 @@ def test_richcompare(self):
@support.cpython_only
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
def test_format(self):
"""Test PyUnicode_Contains()"""
"""Test PyUnicode_Format()"""
from _testcapi import unicode_format as format

self.assertEqual(format('x=%d!', 42), 'x=42!')
Expand Down Expand Up @@ -838,6 +845,7 @@ def test_isidentifier(self):
self.assertEqual(isidentifier("["), 0)
self.assertEqual(isidentifier("©"), 0)
self.assertEqual(isidentifier("0"), 0)
self.assertEqual(isidentifier("32M"), 0)

# CRASHES isidentifier(b"a")
# CRASHES isidentifier([])
Expand Down
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy