From 13286d116667d6ef3c1bc9a4dd7cfdeeb710b5ae Mon Sep 17 00:00:00 2001 From: Masayuki Moriyama Date: Fri, 3 Nov 2023 21:39:48 +0900 Subject: [PATCH 1/3] gh-101180: Fix a bug where iso2022_jp_3 and iso2022_jp_2004 codecs read out of bounds iso2022_jp_3 and iso2022_jp_2004 codecs read out of bounds when encoding Unicode combining character sequence. This bug ocurs the following error: $ python3 -c "print('\u304b\u309a'.encode('iso2022_jp_2004'))" Traceback (most recent call last): File "", line 1, in UnicodeEncodeError: 'iso2022_jp_2004' codec can't encode character '\u309a' in position 1: illegal multibyte sequence This commit fixes the out-of-bounds read. --- Modules/cjkcodecs/_codecs_iso2022.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c index 86bb73b982a551..e8835ad0909633 100644 --- a/Modules/cjkcodecs/_codecs_iso2022.c +++ b/Modules/cjkcodecs/_codecs_iso2022.c @@ -207,8 +207,9 @@ ENCODER(iso2022) encoded = MAP_UNMAPPABLE; for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) { + Py_UCS4 buf[2] = {c, 0}; Py_ssize_t length = 1; - encoded = dsg->encoder(codec, &c, &length); + encoded = dsg->encoder(codec, buf, &length); if (encoded == MAP_MULTIPLE_AVAIL) { /* this implementation won't work for pair * of non-bmp characters. */ @@ -217,9 +218,11 @@ ENCODER(iso2022) return MBERR_TOOFEW; length = -1; } - else + else { + buf[1] = INCHAR2; length = 2; - encoded = dsg->encoder(codec, &c, &length); + } + encoded = dsg->encoder(codec, buf, &length); if (encoded != MAP_UNMAPPABLE) { insize = length; break; From ddff542d84d24c82fd23c4137e2c6362c617d22c Mon Sep 17 00:00:00 2001 From: Masayuki Moriyama Date: Fri, 3 Nov 2023 21:40:01 +0900 Subject: [PATCH 2/3] gh-101180: Add test for iso2022_jp_3 and iso2022_jp_2004 codecs iso2022_jp_3 and iso2022_jp_2004 are upward compatible with iso2022_jp. In addition to testing iso2022_jp, we will test the following characters added in iso2022_jp_3 and iso2022_jp_2004. JIS X 0213 Unicode ---------------- --------------------------------------------- Plane 1 \x2E\x23 U+3402 Basic Multilingual Plane Plane 1 \x2E\x22 U+2000B Supplementary Ideographic Plane Plane 1 \x24\x77 U+304B U+309A Combining Character Suqence Plane 2 \x21\x22 U+4E02 Basic Multilingual Plane Plane 2 \x7E\x76 U+2A6B2 Supplementary Ideographic Plane The difference between iso2022_jp_3 and iso2022_jp_2004 is the difference between JIS X 0213:2000 and JIS X 0213:2004. Tests the following a character added from JIS X 0213:2000 to JIS X 0213:2004. JIS X 0213:2004 Unicode ---------------- ------- Plane 1 \x2E\x21 U+4FF1 Escape sequence to designate JIS X 0213 character set to G0: character set ESC sequence ----------------------- --------------------------- JIS X 0213:2000 Plane 1 ESC 2/4 2/8 4/15 ESC $ ( O JIS X 0213:2000 Plane 2 ESC 2/4 2/8 5/0 ESC $ ( P JIS X 0213:2004 Plane 1 ESC 2/4 2/8 5/1 ESC $ ( Q JIS X 0213:2004 Plane 2 ESC 2/4 2/8 5/0 ESC $ ( P --- Lib/test/test_codecencodings_iso2022.py | 46 +++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/Lib/test/test_codecencodings_iso2022.py b/Lib/test/test_codecencodings_iso2022.py index 00ea1c39dd6fb6..027dbecc6134df 100644 --- a/Lib/test/test_codecencodings_iso2022.py +++ b/Lib/test/test_codecencodings_iso2022.py @@ -24,6 +24,52 @@ class Test_ISO2022_JP2(multibytecodec_support.TestBase, unittest.TestCase): (b'ab\x1BNdef', 'replace', 'abdef'), ) +class Test_ISO2022_JP3(multibytecodec_support.TestBase, unittest.TestCase): + encoding = 'iso2022_jp_3' + tstring = multibytecodec_support.load_teststring('iso2022_jp') + codectests = COMMON_CODEC_TESTS + ( + (b'ab\x1BNdef', 'replace', 'ab\x1BNdef'), + (b'\x1B$(O\x2E\x23\x1B(B', 'strict', '\u3402' ), + (b'\x1B$(O\x2E\x22\x1B(B', 'strict', '\U0002000B' ), + (b'\x1B$(O\x24\x77\x1B(B', 'strict', '\u304B\u309A'), + (b'\x1B$(P\x21\x22\x1B(B', 'strict', '\u4E02' ), + (b'\x1B$(P\x7E\x76\x1B(B', 'strict', '\U0002A6B2' ), + ('\u3402', 'strict', b'\x1B$(O\x2E\x23\x1B(B'), + ('\U0002000B', 'strict', b'\x1B$(O\x2E\x22\x1B(B'), + ('\u304B\u309A', 'strict', b'\x1B$(O\x24\x77\x1B(B'), + ('\u4E02', 'strict', b'\x1B$(P\x21\x22\x1B(B'), + ('\U0002A6B2', 'strict', b'\x1B$(P\x7E\x76\x1B(B'), + (b'ab\x1B$(O\x2E\x21\x1B(Bdef', 'replace', 'ab\uFFFDdef'), + ('ab\u4FF1def', 'replace', b'ab?def'), + ) + xmlcharnametest = ( + '\xAB\u211C\xBB = \u2329\u1234\u232A', + b'\x1B$(O\x29\x28\x1B(Bℜ\x1B$(O\x29\x32\x1B(B = ⟨ሴ⟩' + ) + +class Test_ISO2022_JP2004(multibytecodec_support.TestBase, unittest.TestCase): + encoding = 'iso2022_jp_2004' + tstring = multibytecodec_support.load_teststring('iso2022_jp') + codectests = COMMON_CODEC_TESTS + ( + (b'ab\x1BNdef', 'replace', 'ab\x1BNdef'), + (b'\x1B$(Q\x2E\x23\x1B(B', 'strict', '\u3402' ), + (b'\x1B$(Q\x2E\x22\x1B(B', 'strict', '\U0002000B' ), + (b'\x1B$(Q\x24\x77\x1B(B', 'strict', '\u304B\u309A'), + (b'\x1B$(P\x21\x22\x1B(B', 'strict', '\u4E02' ), + (b'\x1B$(P\x7E\x76\x1B(B', 'strict', '\U0002A6B2' ), + ('\u3402', 'strict', b'\x1B$(Q\x2E\x23\x1B(B'), + ('\U0002000B', 'strict', b'\x1B$(Q\x2E\x22\x1B(B'), + ('\u304B\u309A', 'strict', b'\x1B$(Q\x24\x77\x1B(B'), + ('\u4E02', 'strict', b'\x1B$(P\x21\x22\x1B(B'), + ('\U0002A6B2', 'strict', b'\x1B$(P\x7E\x76\x1B(B'), + (b'ab\x1B$(Q\x2E\x21\x1B(Bdef', 'replace', 'ab\u4FF1def'), + ('ab\u4FF1def', 'replace', b'ab\x1B$(Q\x2E\x21\x1B(Bdef'), + ) + xmlcharnametest = ( + '\xAB\u211C\xBB = \u2329\u1234\u232A', + b'\x1B$(Q\x29\x28\x1B(Bℜ\x1B$(Q\x29\x32\x1B(B = ⟨ሴ⟩' + ) + class Test_ISO2022_KR(multibytecodec_support.TestBase, unittest.TestCase): encoding = 'iso2022_kr' tstring = multibytecodec_support.load_teststring('iso2022_kr') From 59e3e6f8f2b5a09b3e9f958b701fba4b479e0481 Mon Sep 17 00:00:00 2001 From: Masayuki Moriyama Date: Fri, 3 Nov 2023 21:40:08 +0900 Subject: [PATCH 3/3] gh-101180: Add NEWS --- .../2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst b/Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst new file mode 100644 index 00000000000000..268a3d310f2b49 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst @@ -0,0 +1 @@ +Fix a bug where ``iso2022_jp_3`` and ``iso2022_jp_2004`` codecs read out of bounds pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy