Skip to content

Commit 913a956

Browse files
authored
gh-119182: Rewrite PyUnicodeWriter tests in Python (#120845)
1 parent 4123226 commit 913a956

File tree

2 files changed

+336
-262
lines changed

2 files changed

+336
-262
lines changed

Lib/test/test_capi/test_unicode.py

Lines changed: 149 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616
import _testinternalcapi
1717
except ImportError:
1818
_testinternalcapi = None
19+
try:
20+
import ctypes
21+
except ImportError:
22+
ctypes = None
1923

2024

2125
NULL = None
@@ -352,13 +356,13 @@ def test_fromobject(self):
352356
self.assertRaises(TypeError, fromobject, [])
353357
# CRASHES fromobject(NULL)
354358

359+
@unittest.skipIf(ctypes is None, 'need ctypes')
355360
def test_from_format(self):
356361
"""Test PyUnicode_FromFormat()"""
357362
# Length modifiers "j" and "t" are not tested here because ctypes does
358363
# not expose types for intmax_t and ptrdiff_t.
359364
# _testlimitedcapi.test_string_from_format() has a wider coverage of all
360365
# formats.
361-
import_helper.import_module('ctypes')
362366
from ctypes import (
363367
c_char_p,
364368
pythonapi, py_object, sizeof,
@@ -1676,5 +1680,149 @@ def test_pep393_utf8_caching_bug(self):
16761680
self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
16771681

16781682

1683+
class PyUnicodeWriterTest(unittest.TestCase):
1684+
def create_writer(self, size):
1685+
return _testcapi.PyUnicodeWriter(size)
1686+
1687+
def test_basic(self):
1688+
writer = self.create_writer(100)
1689+
1690+
# test PyUnicodeWriter_WriteUTF8()
1691+
writer.write_utf8(b'var', -1)
1692+
1693+
# test PyUnicodeWriter_WriteChar()
1694+
writer.write_char('=')
1695+
1696+
# test PyUnicodeWriter_WriteSubstring()
1697+
writer.write_substring("[long]", 1, 5);
1698+
1699+
# test PyUnicodeWriter_WriteStr()
1700+
writer.write_str(" value ")
1701+
1702+
# test PyUnicodeWriter_WriteRepr()
1703+
writer.write_repr("repr")
1704+
1705+
self.assertEqual(writer.finish(),
1706+
"var=long value 'repr'")
1707+
1708+
def test_utf8(self):
1709+
writer = self.create_writer(0)
1710+
writer.write_utf8(b"ascii", -1)
1711+
writer.write_char('-')
1712+
writer.write_utf8(b"latin1=\xC3\xA9", -1)
1713+
writer.write_char('-')
1714+
writer.write_utf8(b"euro=\xE2\x82\xAC", -1)
1715+
writer.write_char('.')
1716+
self.assertEqual(writer.finish(),
1717+
"ascii-latin1=\xE9-euro=\u20AC.")
1718+
1719+
def test_invalid_utf8(self):
1720+
writer = self.create_writer(0)
1721+
with self.assertRaises(UnicodeDecodeError):
1722+
writer.write_utf8(b"invalid=\xFF", -1)
1723+
1724+
def test_recover_utf8_error(self):
1725+
# test recovering from PyUnicodeWriter_WriteUTF8() error
1726+
writer = self.create_writer(0)
1727+
writer.write_utf8(b"value=", -1)
1728+
1729+
# write fails with an invalid string
1730+
with self.assertRaises(UnicodeDecodeError):
1731+
writer.write_utf8(b"invalid\xFF", -1)
1732+
1733+
# retry write with a valid string
1734+
writer.write_utf8(b"valid", -1)
1735+
1736+
self.assertEqual(writer.finish(),
1737+
"value=valid")
1738+
1739+
def test_decode_utf8(self):
1740+
# test PyUnicodeWriter_DecodeUTF8Stateful()
1741+
writer = self.create_writer(0)
1742+
writer.decodeutf8stateful(b"ign\xFFore", -1, b"ignore")
1743+
writer.write_char('-')
1744+
writer.decodeutf8stateful(b"replace\xFF", -1, b"replace")
1745+
writer.write_char('-')
1746+
1747+
# incomplete trailing UTF-8 sequence
1748+
writer.decodeutf8stateful(b"incomplete\xC3", -1, b"replace")
1749+
1750+
self.assertEqual(writer.finish(),
1751+
"ignore-replace\uFFFD-incomplete\uFFFD")
1752+
1753+
def test_decode_utf8_consumed(self):
1754+
# test PyUnicodeWriter_DecodeUTF8Stateful() with consumed
1755+
writer = self.create_writer(0)
1756+
1757+
# valid string
1758+
consumed = writer.decodeutf8stateful(b"text", -1, b"strict", True)
1759+
self.assertEqual(consumed, 4)
1760+
writer.write_char('-')
1761+
1762+
# non-ASCII
1763+
consumed = writer.decodeutf8stateful(b"\xC3\xA9-\xE2\x82\xAC", 6, b"strict", True)
1764+
self.assertEqual(consumed, 6)
1765+
writer.write_char('-')
1766+
1767+
# invalid UTF-8 (consumed is 0 on error)
1768+
with self.assertRaises(UnicodeDecodeError):
1769+
writer.decodeutf8stateful(b"invalid\xFF", -1, b"strict", True)
1770+
1771+
# ignore error handler
1772+
consumed = writer.decodeutf8stateful(b"more\xFF", -1, b"ignore", True)
1773+
self.assertEqual(consumed, 5)
1774+
writer.write_char('-')
1775+
1776+
# incomplete trailing UTF-8 sequence
1777+
consumed = writer.decodeutf8stateful(b"incomplete\xC3", -1, b"ignore", True)
1778+
self.assertEqual(consumed, 10)
1779+
1780+
self.assertEqual(writer.finish(), "text-\xE9-\u20AC-more-incomplete")
1781+
1782+
def test_widechar(self):
1783+
writer = self.create_writer(0)
1784+
writer.write_widechar("latin1=\xE9")
1785+
writer.write_widechar("-")
1786+
writer.write_widechar("euro=\u20AC")
1787+
writer.write_char('.')
1788+
self.assertEqual(writer.finish(), "latin1=\xE9-euro=\u20AC.")
1789+
1790+
1791+
@unittest.skipIf(ctypes is None, 'need ctypes')
1792+
class PyUnicodeWriterFormatTest(unittest.TestCase):
1793+
def create_writer(self, size):
1794+
return _testcapi.PyUnicodeWriter(size)
1795+
1796+
def writer_format(self, writer, *args):
1797+
from ctypes import c_char_p, pythonapi, c_int, c_void_p
1798+
_PyUnicodeWriter_Format = getattr(pythonapi, "PyUnicodeWriter_Format")
1799+
_PyUnicodeWriter_Format.argtypes = (c_void_p, c_char_p,)
1800+
_PyUnicodeWriter_Format.restype = c_int
1801+
1802+
if _PyUnicodeWriter_Format(writer.get_pointer(), *args) < 0:
1803+
raise ValueError("PyUnicodeWriter_Format failed")
1804+
1805+
def test_format(self):
1806+
from ctypes import c_int
1807+
writer = self.create_writer(0)
1808+
self.writer_format(writer, b'%s %i', b'abc', c_int(123))
1809+
writer.write_char('.')
1810+
self.assertEqual(writer.finish(), 'abc 123.')
1811+
1812+
def test_recover_error(self):
1813+
# test recovering from PyUnicodeWriter_Format() error
1814+
writer = self.create_writer(0)
1815+
self.writer_format(writer, b"%s ", b"Hello")
1816+
1817+
# PyUnicodeWriter_Format() fails with an invalid format string
1818+
with self.assertRaises(ValueError):
1819+
self.writer_format(writer, b"%s\xff", b"World")
1820+
1821+
# Retry PyUnicodeWriter_Format() with a valid format string
1822+
self.writer_format(writer, b"%s.", b"World")
1823+
1824+
self.assertEqual(writer.finish(), 'Hello World.')
1825+
1826+
16791827
if __name__ == "__main__":
16801828
unittest.main()

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy