From c2f10106d82ec2a8027f8f04b8d14cd454e01304 Mon Sep 17 00:00:00 2001
From: Inada Naoki <songofacandy@gmail.com>
Date: Wed, 30 Apr 2025 07:14:55 +0000
Subject: [PATCH 1/6] json: Optimize escaping string in Encoder

---
 Modules/_json.c | 148 +++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 115 insertions(+), 33 deletions(-)

diff --git a/Modules/_json.c b/Modules/_json.c
index 89b0a41dd10acb..6819043feb1f5d 100644
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -51,7 +51,7 @@ typedef struct _PyEncoderObject {
     char sort_keys;
     char skipkeys;
     int allow_nan;
-    PyCFunction fast_encode;
+    int (*fast_encode)(PyUnicodeWriter *, PyObject*);
 } PyEncoderObject;
 
 #define PyEncoderObject_CAST(op)    ((PyEncoderObject *)(op))
@@ -102,8 +102,8 @@ static PyObject *
 _encoded_const(PyObject *obj);
 static void
 raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
-static PyObject *
-encoder_encode_string(PyEncoderObject *s, PyObject *obj);
+static int
+encoder_write_string(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj);
 static PyObject *
 encoder_encode_float(PyEncoderObject *s, PyObject *obj);
 
@@ -303,6 +303,89 @@ escape_unicode(PyObject *pystr)
     return rval;
 }
 
+// Take a PyUnicode pystr and write an ASCII-only escaped string to writer.
+static int
+write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr)
+{
+    Py_ssize_t i;
+    Py_ssize_t input_chars;
+    Py_ssize_t chars;
+    Py_ssize_t copy_len = 0;
+    const void *input;
+    int kind;
+    int ret;
+    unsigned char buf[12];
+
+    input_chars = PyUnicode_GET_LENGTH(pystr);
+    input = PyUnicode_DATA(pystr);
+    kind = PyUnicode_KIND(pystr);
+
+    ret = PyUnicodeWriter_WriteChar(writer, '"');
+    if (ret) return ret;
+
+    for (i = 0; i < input_chars; i++) {
+        Py_UCS4 c = PyUnicode_READ(kind, input, i);
+        if (S_CHAR(c)) {
+            copy_len++;
+        }
+        else {
+            ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i);
+            if (ret) return ret;
+            copy_len = 0;
+
+            chars = ascii_escape_unichar(c, buf, 0);
+            ret = PyUnicodeWriter_WriteUTF8(writer, (const char*)buf, chars);
+            if (ret) return ret;
+        }
+    }
+
+    ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i);
+    if (ret) return ret;
+
+    return PyUnicodeWriter_WriteChar(writer, '"');
+}
+
+// Take a PyUnicode pystr and write an escaped string to writer.
+static int
+write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr)
+{
+    Py_ssize_t i;
+    Py_ssize_t input_chars;
+    Py_ssize_t chars;
+    Py_ssize_t copy_len = 0;
+    const void *input;
+    int kind;
+    int ret;
+    unsigned char buf[12];
+
+    input_chars = PyUnicode_GET_LENGTH(pystr);
+    input = PyUnicode_DATA(pystr);
+    kind = PyUnicode_KIND(pystr);
+
+    ret = PyUnicodeWriter_WriteChar(writer, '"');
+    if (ret) return ret;
+
+    for (i = 0; i < input_chars; i++) {
+        Py_UCS4 c = PyUnicode_READ(kind, input, i);
+        if (c <= 0x1f || c == '\\' || c == '"') {
+            ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i);
+            if (ret) return ret;
+            copy_len = 0;
+
+            chars = ascii_escape_unichar(c, buf, 0);
+            ret = PyUnicodeWriter_WriteUTF8(writer, (const char*)buf, chars);
+            if (ret) return ret;
+        }
+        else {
+            copy_len++;
+        }
+    }
+
+    ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i);
+    if (ret) return ret;
+    return PyUnicodeWriter_WriteChar(writer, '"');
+}
+
 static void
 raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
 {
@@ -1255,8 +1338,11 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 
     if (PyCFunction_Check(s->encoder)) {
         PyCFunction f = PyCFunction_GetFunction(s->encoder);
-        if (f == py_encode_basestring_ascii || f == py_encode_basestring) {
-            s->fast_encode = f;
+        if (f == py_encode_basestring_ascii){
+            s->fast_encode = write_escaped_ascii;
+        }
+        else if (f == py_encode_basestring) {
+            s->fast_encode = write_escaped_unicode;
         }
     }
 
@@ -1437,33 +1523,35 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj)
     return PyFloat_Type.tp_repr(obj);
 }
 
-static PyObject *
-encoder_encode_string(PyEncoderObject *s, PyObject *obj)
+static int
+_steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen)
 {
-    /* Return the JSON representation of a string */
-    PyObject *encoded;
+    /* Append stolen and then decrement its reference count */
+    int rval = PyUnicodeWriter_WriteStr(writer, stolen);
+    Py_DECREF(stolen);
+    return rval;
+}
 
+static int
+encoder_write_string(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj)
+{
     if (s->fast_encode) {
-        return s->fast_encode(NULL, obj);
+        return s->fast_encode(writer, obj);
+    }
+
+    /* Return the JSON representation of a string */
+    PyObject *encoded = PyObject_CallOneArg(s->encoder, obj);
+    if (encoded == NULL) {
+        return -1;
     }
-    encoded = PyObject_CallOneArg(s->encoder, obj);
     if (encoded != NULL && !PyUnicode_Check(encoded)) {
         PyErr_Format(PyExc_TypeError,
                      "encoder() must return a string, not %.80s",
                      Py_TYPE(encoded)->tp_name);
         Py_DECREF(encoded);
-        return NULL;
+        return -1;
     }
-    return encoded;
-}
-
-static int
-_steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen)
-{
-    /* Append stolen and then decrement its reference count */
-    int rval = PyUnicodeWriter_WriteStr(writer, stolen);
-    Py_DECREF(stolen);
-    return rval;
+    return _steal_accumulate(writer, encoded);
 }
 
 static int
@@ -1485,10 +1573,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
       return PyUnicodeWriter_WriteUTF8(writer, "false", 5);
     }
     else if (PyUnicode_Check(obj)) {
-        PyObject *encoded = encoder_encode_string(s, obj);
-        if (encoded == NULL)
-            return -1;
-        return _steal_accumulate(writer, encoded);
+        return encoder_write_string(s, writer, obj);
     }
     else if (PyLong_Check(obj)) {
         if (PyLong_CheckExact(obj)) {
@@ -1577,7 +1662,7 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
                          PyObject *item_separator)
 {
     PyObject *keystr = NULL;
-    PyObject *encoded;
+    int rv;
 
     if (PyUnicode_Check(key)) {
         keystr = Py_NewRef(key);
@@ -1617,15 +1702,12 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
         }
     }
 
-    encoded = encoder_encode_string(s, keystr);
+    rv = encoder_write_string(s, writer, keystr);
     Py_DECREF(keystr);
-    if (encoded == NULL) {
-        return -1;
+    if (rv != 0) {
+        return rv;
     }
 
-    if (_steal_accumulate(writer, encoded) < 0) {
-        return -1;
-    }
     if (PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
         return -1;
     }

From 59e5131a5e2efb25bb0239878acedc50b01d918f Mon Sep 17 00:00:00 2001
From: Inada Naoki <songofacandy@gmail.com>
Date: Wed, 30 Apr 2025 07:27:34 +0000
Subject: [PATCH 2/6] add news and whatsnew

---
 Doc/whatsnew/3.14.rst                                      | 7 +++++++
 .../Library/2025-04-30-07-27-30.gh-issue-133186.MSMobf.rst | 2 ++
 2 files changed, 9 insertions(+)
 create mode 100644 Misc/NEWS.d/next/Library/2025-04-30-07-27-30.gh-issue-133186.MSMobf.rst

diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index 9e6b69fbc05273..7b4a95a9f4fad8 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -1518,6 +1518,13 @@ io
   :gh:`120754` and :gh:`90102`.)
 
 
+json
+----
+
+* Improve the performance of :class:`~json.JSONEncoder` encodes strings.
+  (Contributed by Inada Naoki in :gh:`133186`.)
+
+
 uuid
 ----
 
diff --git a/Misc/NEWS.d/next/Library/2025-04-30-07-27-30.gh-issue-133186.MSMobf.rst b/Misc/NEWS.d/next/Library/2025-04-30-07-27-30.gh-issue-133186.MSMobf.rst
new file mode 100644
index 00000000000000..1987d06c27efb3
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-04-30-07-27-30.gh-issue-133186.MSMobf.rst
@@ -0,0 +1,2 @@
+Improve the performance of :class:`~json.JSONEncoder` encodes strings.
+(Contributed by Inada Naoki in :gh:`133186`.)

From ee1a7f693b74db1bbeeaa39ae13f4bf2f1c1191b Mon Sep 17 00:00:00 2001
From: Inada Naoki <songofacandy@gmail.com>
Date: Wed, 30 Apr 2025 20:38:03 +0900
Subject: [PATCH 3/6] add comment

---
 Modules/_json.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Modules/_json.c b/Modules/_json.c
index 6819043feb1f5d..23ec53c19a2698 100644
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -304,6 +304,8 @@ escape_unicode(PyObject *pystr)
 }
 
 // Take a PyUnicode pystr and write an ASCII-only escaped string to writer.
+// Same to ascii_escape_unicode(), but write to PyUnicodeWriter instead of
+// return Unicode object.
 static int
 write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr)
 {
@@ -346,6 +348,8 @@ write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr)
 }
 
 // Take a PyUnicode pystr and write an escaped string to writer.
+// Same to escape_unicode(), but write to PyUnicodeWriter instead of
+// return Unicode object.
 static int
 write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr)
 {

From d026be336b7abdf9be4eb4c52ed18abbdb857278 Mon Sep 17 00:00:00 2001
From: Inada Naoki <songofacandy@gmail.com>
Date: Wed, 30 Apr 2025 22:02:01 +0900
Subject: [PATCH 4/6] apply suggested change

---
 Modules/_json.c | 69 ++++++++++---------------------------------------
 1 file changed, 13 insertions(+), 56 deletions(-)

diff --git a/Modules/_json.c b/Modules/_json.c
index 23ec53c19a2698..b543a764414a50 100644
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -48,10 +48,11 @@ typedef struct _PyEncoderObject {
     PyObject *indent;
     PyObject *key_separator;
     PyObject *item_separator;
-    char sort_keys;
-    char skipkeys;
-    int allow_nan;
-    int (*fast_encode)(PyUnicodeWriter *, PyObject*);
+    bool sort_keys;
+    bool skipkeys;
+    bool allow_nan;
+    bool fast_encode;
+    bool ensure_ascii; /* used only when fast_encode == true */
 } PyEncoderObject;
 
 #define PyEncoderObject_CAST(op)    ((PyEncoderObject *)(op))
@@ -303,55 +304,9 @@ escape_unicode(PyObject *pystr)
     return rval;
 }
 
-// Take a PyUnicode pystr and write an ASCII-only escaped string to writer.
-// Same to ascii_escape_unicode(), but write to PyUnicodeWriter instead of
-// return Unicode object.
-static int
-write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr)
-{
-    Py_ssize_t i;
-    Py_ssize_t input_chars;
-    Py_ssize_t chars;
-    Py_ssize_t copy_len = 0;
-    const void *input;
-    int kind;
-    int ret;
-    unsigned char buf[12];
-
-    input_chars = PyUnicode_GET_LENGTH(pystr);
-    input = PyUnicode_DATA(pystr);
-    kind = PyUnicode_KIND(pystr);
-
-    ret = PyUnicodeWriter_WriteChar(writer, '"');
-    if (ret) return ret;
-
-    for (i = 0; i < input_chars; i++) {
-        Py_UCS4 c = PyUnicode_READ(kind, input, i);
-        if (S_CHAR(c)) {
-            copy_len++;
-        }
-        else {
-            ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i);
-            if (ret) return ret;
-            copy_len = 0;
-
-            chars = ascii_escape_unichar(c, buf, 0);
-            ret = PyUnicodeWriter_WriteUTF8(writer, (const char*)buf, chars);
-            if (ret) return ret;
-        }
-    }
-
-    ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i);
-    if (ret) return ret;
-
-    return PyUnicodeWriter_WriteChar(writer, '"');
-}
-
 // Take a PyUnicode pystr and write an escaped string to writer.
-// Same to escape_unicode(), but write to PyUnicodeWriter instead of
-// return Unicode object.
 static int
-write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr)
+write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr, bool ascii_only)
 {
     Py_ssize_t i;
     Py_ssize_t input_chars;
@@ -371,7 +326,7 @@ write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr)
 
     for (i = 0; i < input_chars; i++) {
         Py_UCS4 c = PyUnicode_READ(kind, input, i);
-        if (c <= 0x1f || c == '\\' || c == '"') {
+        if (c <= 0x1f || c == '\\' || c == '"' || (ascii_only && c >= 0x7f)) {
             ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i);
             if (ret) return ret;
             copy_len = 0;
@@ -1338,15 +1293,17 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
     s->sort_keys = sort_keys;
     s->skipkeys = skipkeys;
     s->allow_nan = allow_nan;
-    s->fast_encode = NULL;
+    s->fast_encode = false;
+    s->ensure_ascii = false;
 
     if (PyCFunction_Check(s->encoder)) {
         PyCFunction f = PyCFunction_GetFunction(s->encoder);
         if (f == py_encode_basestring_ascii){
-            s->fast_encode = write_escaped_ascii;
+            s->fast_encode = true;
+            s->ensure_ascii = true;
         }
         else if (f == py_encode_basestring) {
-            s->fast_encode = write_escaped_unicode;
+            s->fast_encode = true;
         }
     }
 
@@ -1540,7 +1497,7 @@ static int
 encoder_write_string(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj)
 {
     if (s->fast_encode) {
-        return s->fast_encode(writer, obj);
+        return write_escaped_unicode(writer, obj, s->ensure_ascii);
     }
 
     /* Return the JSON representation of a string */

From 8e5e00b4eb43fbecb31dce2485c36684a86592bc Mon Sep 17 00:00:00 2001
From: Inada Naoki <songofacandy@gmail.com>
Date: Thu, 1 May 2025 10:33:00 +0900
Subject: [PATCH 5/6] use tmp buffer

---
 Modules/_json.c | 120 ++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 97 insertions(+), 23 deletions(-)

diff --git a/Modules/_json.c b/Modules/_json.c
index b543a764414a50..d6628fc2e871d6 100644
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -48,11 +48,10 @@ typedef struct _PyEncoderObject {
     PyObject *indent;
     PyObject *key_separator;
     PyObject *item_separator;
+    int (*fast_encode)(PyUnicodeWriter *, PyObject *);
     bool sort_keys;
     bool skipkeys;
     bool allow_nan;
-    bool fast_encode;
-    bool ensure_ascii; /* used only when fast_encode == true */
 } PyEncoderObject;
 
 #define PyEncoderObject_CAST(op)    ((PyEncoderObject *)(op))
@@ -304,18 +303,20 @@ escape_unicode(PyObject *pystr)
     return rval;
 }
 
-// Take a PyUnicode pystr and write an escaped string to writer.
+#define ESCAPE_BUF_SIZE 200
+
+// Take a PyUnicode pystr and write an escaped string to writer. (ensure_ascii)
 static int
-write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr, bool ascii_only)
+write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr)
 {
     Py_ssize_t i;
     Py_ssize_t input_chars;
-    Py_ssize_t chars;
-    Py_ssize_t copy_len = 0;
+    Py_ssize_t buf_len;
     const void *input;
+    Py_UCS4 c = 0;
     int kind;
     int ret;
-    unsigned char buf[12];
+    char buf[ESCAPE_BUF_SIZE];  // avoid overhead of PyUnicodeWriter APIs
 
     input_chars = PyUnicode_GET_LENGTH(pystr);
     input = PyUnicode_DATA(pystr);
@@ -324,27 +325,102 @@ write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr, bool ascii_only)
     ret = PyUnicodeWriter_WriteChar(writer, '"');
     if (ret) return ret;
 
+    // Fast path for string doesn't need escape at all: e.g. "id", "name"
     for (i = 0; i < input_chars; i++) {
+        c = PyUnicode_READ(kind, input, i);
+        if (!S_CHAR(c)) {
+            break;
+        }
+    }
+    if (i > 0) {
+        ret = PyUnicodeWriter_WriteSubstring(writer, pystr, 0, i);
+        if (ret) return ret;
+    }
+    if (i == input_chars) {
+        return PyUnicodeWriter_WriteChar(writer, '"');
+    }
+
+    buf_len = ascii_escape_unichar(c, (unsigned char*)buf, 0);
+
+    for (i++ ; i < input_chars; i++) {
         Py_UCS4 c = PyUnicode_READ(kind, input, i);
-        if (c <= 0x1f || c == '\\' || c == '"' || (ascii_only && c >= 0x7f)) {
-            ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i);
-            if (ret) return ret;
-            copy_len = 0;
+        if (S_CHAR(c)) {
+            buf[buf_len++] = c;
+        }
+        else {
+            buf_len = ascii_escape_unichar(c, (unsigned char*)buf, buf_len);
+        }
 
-            chars = ascii_escape_unichar(c, buf, 0);
-            ret = PyUnicodeWriter_WriteUTF8(writer, (const char*)buf, chars);
+        if (buf_len + 12 > ESCAPE_BUF_SIZE) {
+            ret = PyUnicodeWriter_WriteUTF8(writer, buf, buf_len);
             if (ret) return ret;
+            buf_len = 0;
+        }
+    }
+
+    assert(buf_len < ESCAPE_BUF_SIZE);
+    buf[buf_len++] = '"';
+    return PyUnicodeWriter_WriteUTF8(writer, buf, buf_len);
+}
+
+static int
+write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr)
+{
+    Py_ssize_t i;
+    Py_ssize_t input_size;
+    Py_ssize_t buf_len;
+    const unsigned char *input;
+    int ret;
+    unsigned char c = 0;
+    char buf[ESCAPE_BUF_SIZE];
+
+    // We don't need to escape non-ASCII chars.
+    // So we just copy UTF-8 from pystr to buf.
+    input = (const unsigned char*) PyUnicode_AsUTF8AndSize(pystr, &input_size);
+
+    ret = PyUnicodeWriter_WriteChar(writer, '"');
+    if (ret) return ret;
+
+    // Fast path for string doesn't need escape at all: e.g. "id", "name"
+    for (i = 0; i < input_size; i++) {
+        c = input[i];
+        if (c <= 0x1f || c == '\\' || c == '"') {
+            break;
+        }
+    }
+    if (i > 0) {
+        ret = PyUnicodeWriter_WriteUTF8(writer, (const char *)input, i);
+        if (ret) return ret;
+    }
+    if (i == input_size) {
+        return PyUnicodeWriter_WriteChar(writer, '"');
+    }
+
+    buf_len = ascii_escape_unichar(c, (unsigned char *)buf, 0);
+
+    for (i++; i < input_size; i++) {
+        c = input[i];
+        if (c <= 0x1f || c == '\\' || c == '"') {
+            buf_len = ascii_escape_unichar(c, (unsigned char *)buf, buf_len);
         }
         else {
-            copy_len++;
+            buf[buf_len++] = c;
+        }
+
+        if (buf_len + 6 > ESCAPE_BUF_SIZE) {
+            ret = PyUnicodeWriter_WriteUTF8(writer, buf, buf_len);
+            if (ret) return ret;
+            buf_len = 0;
         }
     }
 
-    ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i);
-    if (ret) return ret;
-    return PyUnicodeWriter_WriteChar(writer, '"');
+    assert(buf_len < ESCAPE_BUF_SIZE);
+    buf[buf_len++] = '"';
+    return PyUnicodeWriter_WriteUTF8(writer, buf, buf_len);
 }
 
+#undef ESCAPE_BUF_SIZE
+
 static void
 raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
 {
@@ -1293,17 +1369,15 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
     s->sort_keys = sort_keys;
     s->skipkeys = skipkeys;
     s->allow_nan = allow_nan;
-    s->fast_encode = false;
-    s->ensure_ascii = false;
+    s->fast_encode = NULL;
 
     if (PyCFunction_Check(s->encoder)) {
         PyCFunction f = PyCFunction_GetFunction(s->encoder);
         if (f == py_encode_basestring_ascii){
-            s->fast_encode = true;
-            s->ensure_ascii = true;
+            s->fast_encode = write_escaped_ascii;
         }
         else if (f == py_encode_basestring) {
-            s->fast_encode = true;
+            s->fast_encode = write_escaped_unicode;
         }
     }
 
@@ -1497,7 +1571,7 @@ static int
 encoder_write_string(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj)
 {
     if (s->fast_encode) {
-        return write_escaped_unicode(writer, obj, s->ensure_ascii);
+        return s->fast_encode(writer, obj);
     }
 
     /* Return the JSON representation of a string */

From 19c0f1fb5747a3f927f5f505966fdb732e75d953 Mon Sep 17 00:00:00 2001
From: Inada Naoki <songofacandy@gmail.com>
Date: Thu, 1 May 2025 07:05:50 +0000
Subject: [PATCH 6/6] use UCS4 instead of UTF8

---
 Modules/_json.c | 71 +++++++++++++++++++++++++++++--------------------
 1 file changed, 42 insertions(+), 29 deletions(-)

diff --git a/Modules/_json.c b/Modules/_json.c
index d6628fc2e871d6..cd08fa688d3a52 100644
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -303,12 +303,11 @@ escape_unicode(PyObject *pystr)
     return rval;
 }
 
-#define ESCAPE_BUF_SIZE 200
-
 // Take a PyUnicode pystr and write an escaped string to writer. (ensure_ascii)
 static int
 write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr)
 {
+#define ESCAPE_BUF_SIZE 200
     Py_ssize_t i;
     Py_ssize_t input_chars;
     Py_ssize_t buf_len;
@@ -367,60 +366,74 @@ static int
 write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr)
 {
     Py_ssize_t i;
-    Py_ssize_t input_size;
-    Py_ssize_t buf_len;
-    const unsigned char *input;
+    Py_ssize_t input_chars;
+    Py_ssize_t chars = 0;
+    const void *input;
+    int kind;
     int ret;
-    unsigned char c = 0;
-    char buf[ESCAPE_BUF_SIZE];
+    Py_UCS4 output[ESCAPE_BUF_SIZE];
 
-    // We don't need to escape non-ASCII chars.
-    // So we just copy UTF-8 from pystr to buf.
-    input = (const unsigned char*) PyUnicode_AsUTF8AndSize(pystr, &input_size);
+    input_chars = PyUnicode_GET_LENGTH(pystr);
+    input = PyUnicode_DATA(pystr);
+    kind = PyUnicode_KIND(pystr);
 
     ret = PyUnicodeWriter_WriteChar(writer, '"');
     if (ret) return ret;
 
     // Fast path for string doesn't need escape at all: e.g. "id", "name"
-    for (i = 0; i < input_size; i++) {
-        c = input[i];
+    for (i = 0; i < input_chars; i++) {
+        Py_UCS4 c = PyUnicode_READ(kind, input, i);
         if (c <= 0x1f || c == '\\' || c == '"') {
             break;
         }
     }
     if (i > 0) {
-        ret = PyUnicodeWriter_WriteUTF8(writer, (const char *)input, i);
+        ret = PyUnicodeWriter_WriteSubstring(writer, pystr, 0, i);
         if (ret) return ret;
     }
-    if (i == input_size) {
+    if (i == input_chars) {
         return PyUnicodeWriter_WriteChar(writer, '"');
     }
 
-    buf_len = ascii_escape_unichar(c, (unsigned char *)buf, 0);
+    for (; i < input_chars; i++) {
+        Py_UCS4 c = PyUnicode_READ(kind, input, i);
 
-    for (i++; i < input_size; i++) {
-        c = input[i];
-        if (c <= 0x1f || c == '\\' || c == '"') {
-            buf_len = ascii_escape_unichar(c, (unsigned char *)buf, buf_len);
-        }
-        else {
-            buf[buf_len++] = c;
+        // Same to ENCODE_OUTPUT in escape_unicode
+        switch (c) {
+        case '\\': output[chars++] = '\\'; output[chars++] = c; break;
+        case '"':  output[chars++] = '\\'; output[chars++] = c; break;
+        case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break;
+        case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break;
+        case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break;
+        case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break;
+        case '\t': output[chars++] = '\\'; output[chars++] = 't'; break;
+        default:
+            if (c <= 0x1f) {
+                output[chars++] = '\\';
+                output[chars++] = 'u';
+                output[chars++] = '0';
+                output[chars++] = '0';
+                output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
+                output[chars++] = Py_hexdigits[(c     ) & 0xf];
+            } else {
+                output[chars++] = c;
+            }
         }
 
-        if (buf_len + 6 > ESCAPE_BUF_SIZE) {
-            ret = PyUnicodeWriter_WriteUTF8(writer, buf, buf_len);
+        if (chars + 6 > ESCAPE_BUF_SIZE) {
+            ret = PyUnicodeWriter_WriteUCS4(writer, output, chars);
             if (ret) return ret;
-            buf_len = 0;
+            chars = 0;
         }
     }
 
-    assert(buf_len < ESCAPE_BUF_SIZE);
-    buf[buf_len++] = '"';
-    return PyUnicodeWriter_WriteUTF8(writer, buf, buf_len);
+    assert(chars < ESCAPE_BUF_SIZE);
+    output[chars++] = '"';
+    return PyUnicodeWriter_WriteUCS4(writer, output, chars);
 }
-
 #undef ESCAPE_BUF_SIZE
 
+
 static void
 raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
 {

<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Transitional//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'>
<html xmlns='http://www.w3.org/1999/xhtml'>
<head>
<title>pFad - Phonifier reborn</title>
<meta http-equiv='Content-Type' content='text/html; charset=utf-8' />
</head>
<body>
<h1>Pfad - The Proxy pFad of &#169; 2024 Garber Painting. All rights reserved.</h1>


<!-- Disclaimer -->
<p>Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.</p>
<br>
<p>Alternative Proxies:</p><p><a href="http://rainy.clevelandohioweatherforecast.com/php-proxy/index.php?q=https://patch-diff.githubusercontent.com/raw/python/cpython/pull/133186.patch" target="_blank">Alternative Proxy</a></p><p><a href="http://rainy.clevelandohioweatherforecast.com/pFad/index.php?u=https://patch-diff.githubusercontent.com/raw/python/cpython/pull/133186.patch" target="_blank">pFad Proxy</a></p><p><a href="http://rainy.clevelandohioweatherforecast.com/pFad/v3index.php?u=https://patch-diff.githubusercontent.com/raw/python/cpython/pull/133186.patch" target="_blank">pFad v3 Proxy</a></p><p><a href="http://rainy.clevelandohioweatherforecast.com/pFad/v4index.php?u=https://patch-diff.githubusercontent.com/raw/python/cpython/pull/133186.patch" target="_blank">pFad v4 Proxy</a></p></body>
</html>