From c2f10106d82ec2a8027f8f04b8d14cd454e01304 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 30 Apr 2025 07:14:55 +0000 Subject: [PATCH 1/6] json: Optimize escaping string in Encoder --- Modules/_json.c | 148 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 115 insertions(+), 33 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index 89b0a41dd10acb..6819043feb1f5d 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -51,7 +51,7 @@ typedef struct _PyEncoderObject { char sort_keys; char skipkeys; int allow_nan; - PyCFunction fast_encode; + int (*fast_encode)(PyUnicodeWriter *, PyObject*); } PyEncoderObject; #define PyEncoderObject_CAST(op) ((PyEncoderObject *)(op)) @@ -102,8 +102,8 @@ static PyObject * _encoded_const(PyObject *obj); static void raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end); -static PyObject * -encoder_encode_string(PyEncoderObject *s, PyObject *obj); +static int +encoder_write_string(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj); static PyObject * encoder_encode_float(PyEncoderObject *s, PyObject *obj); @@ -303,6 +303,89 @@ escape_unicode(PyObject *pystr) return rval; } +// Take a PyUnicode pystr and write an ASCII-only escaped string to writer. +static int +write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr) +{ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t chars; + Py_ssize_t copy_len = 0; + const void *input; + int kind; + int ret; + unsigned char buf[12]; + + input_chars = PyUnicode_GET_LENGTH(pystr); + input = PyUnicode_DATA(pystr); + kind = PyUnicode_KIND(pystr); + + ret = PyUnicodeWriter_WriteChar(writer, '"'); + if (ret) return ret; + + for (i = 0; i < input_chars; i++) { + Py_UCS4 c = PyUnicode_READ(kind, input, i); + if (S_CHAR(c)) { + copy_len++; + } + else { + ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i); + if (ret) return ret; + copy_len = 0; + + chars = ascii_escape_unichar(c, buf, 0); + ret = PyUnicodeWriter_WriteUTF8(writer, (const char*)buf, chars); + if (ret) return ret; + } + } + + ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i); + if (ret) return ret; + + return PyUnicodeWriter_WriteChar(writer, '"'); +} + +// Take a PyUnicode pystr and write an escaped string to writer. +static int +write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr) +{ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t chars; + Py_ssize_t copy_len = 0; + const void *input; + int kind; + int ret; + unsigned char buf[12]; + + input_chars = PyUnicode_GET_LENGTH(pystr); + input = PyUnicode_DATA(pystr); + kind = PyUnicode_KIND(pystr); + + ret = PyUnicodeWriter_WriteChar(writer, '"'); + if (ret) return ret; + + for (i = 0; i < input_chars; i++) { + Py_UCS4 c = PyUnicode_READ(kind, input, i); + if (c <= 0x1f || c == '\\' || c == '"') { + ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i); + if (ret) return ret; + copy_len = 0; + + chars = ascii_escape_unichar(c, buf, 0); + ret = PyUnicodeWriter_WriteUTF8(writer, (const char*)buf, chars); + if (ret) return ret; + } + else { + copy_len++; + } + } + + ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i); + if (ret) return ret; + return PyUnicodeWriter_WriteChar(writer, '"'); +} + static void raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end) { @@ -1255,8 +1338,11 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (PyCFunction_Check(s->encoder)) { PyCFunction f = PyCFunction_GetFunction(s->encoder); - if (f == py_encode_basestring_ascii || f == py_encode_basestring) { - s->fast_encode = f; + if (f == py_encode_basestring_ascii){ + s->fast_encode = write_escaped_ascii; + } + else if (f == py_encode_basestring) { + s->fast_encode = write_escaped_unicode; } } @@ -1437,33 +1523,35 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj) return PyFloat_Type.tp_repr(obj); } -static PyObject * -encoder_encode_string(PyEncoderObject *s, PyObject *obj) +static int +_steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen) { - /* Return the JSON representation of a string */ - PyObject *encoded; + /* Append stolen and then decrement its reference count */ + int rval = PyUnicodeWriter_WriteStr(writer, stolen); + Py_DECREF(stolen); + return rval; +} +static int +encoder_write_string(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj) +{ if (s->fast_encode) { - return s->fast_encode(NULL, obj); + return s->fast_encode(writer, obj); + } + + /* Return the JSON representation of a string */ + PyObject *encoded = PyObject_CallOneArg(s->encoder, obj); + if (encoded == NULL) { + return -1; } - encoded = PyObject_CallOneArg(s->encoder, obj); if (encoded != NULL && !PyUnicode_Check(encoded)) { PyErr_Format(PyExc_TypeError, "encoder() must return a string, not %.80s", Py_TYPE(encoded)->tp_name); Py_DECREF(encoded); - return NULL; + return -1; } - return encoded; -} - -static int -_steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen) -{ - /* Append stolen and then decrement its reference count */ - int rval = PyUnicodeWriter_WriteStr(writer, stolen); - Py_DECREF(stolen); - return rval; + return _steal_accumulate(writer, encoded); } static int @@ -1485,10 +1573,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, return PyUnicodeWriter_WriteUTF8(writer, "false", 5); } else if (PyUnicode_Check(obj)) { - PyObject *encoded = encoder_encode_string(s, obj); - if (encoded == NULL) - return -1; - return _steal_accumulate(writer, encoded); + return encoder_write_string(s, writer, obj); } else if (PyLong_Check(obj)) { if (PyLong_CheckExact(obj)) { @@ -1577,7 +1662,7 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs PyObject *item_separator) { PyObject *keystr = NULL; - PyObject *encoded; + int rv; if (PyUnicode_Check(key)) { keystr = Py_NewRef(key); @@ -1617,15 +1702,12 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs } } - encoded = encoder_encode_string(s, keystr); + rv = encoder_write_string(s, writer, keystr); Py_DECREF(keystr); - if (encoded == NULL) { - return -1; + if (rv != 0) { + return rv; } - if (_steal_accumulate(writer, encoded) < 0) { - return -1; - } if (PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) { return -1; } From 59e5131a5e2efb25bb0239878acedc50b01d918f Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 30 Apr 2025 07:27:34 +0000 Subject: [PATCH 2/6] add news and whatsnew --- Doc/whatsnew/3.14.rst | 7 +++++++ .../Library/2025-04-30-07-27-30.gh-issue-133186.MSMobf.rst | 2 ++ 2 files changed, 9 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-04-30-07-27-30.gh-issue-133186.MSMobf.rst diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 9e6b69fbc05273..7b4a95a9f4fad8 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1518,6 +1518,13 @@ io :gh:`120754` and :gh:`90102`.) +json +---- + +* Improve the performance of :class:`~json.JSONEncoder` encodes strings. + (Contributed by Inada Naoki in :gh:`133186`.) + + uuid ---- diff --git a/Misc/NEWS.d/next/Library/2025-04-30-07-27-30.gh-issue-133186.MSMobf.rst b/Misc/NEWS.d/next/Library/2025-04-30-07-27-30.gh-issue-133186.MSMobf.rst new file mode 100644 index 00000000000000..1987d06c27efb3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-04-30-07-27-30.gh-issue-133186.MSMobf.rst @@ -0,0 +1,2 @@ +Improve the performance of :class:`~json.JSONEncoder` encodes strings. +(Contributed by Inada Naoki in :gh:`133186`.) From ee1a7f693b74db1bbeeaa39ae13f4bf2f1c1191b Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 30 Apr 2025 20:38:03 +0900 Subject: [PATCH 3/6] add comment --- Modules/_json.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Modules/_json.c b/Modules/_json.c index 6819043feb1f5d..23ec53c19a2698 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -304,6 +304,8 @@ escape_unicode(PyObject *pystr) } // Take a PyUnicode pystr and write an ASCII-only escaped string to writer. +// Same to ascii_escape_unicode(), but write to PyUnicodeWriter instead of +// return Unicode object. static int write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr) { @@ -346,6 +348,8 @@ write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr) } // Take a PyUnicode pystr and write an escaped string to writer. +// Same to escape_unicode(), but write to PyUnicodeWriter instead of +// return Unicode object. static int write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr) { From d026be336b7abdf9be4eb4c52ed18abbdb857278 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 30 Apr 2025 22:02:01 +0900 Subject: [PATCH 4/6] apply suggested change --- Modules/_json.c | 69 ++++++++++--------------------------------------- 1 file changed, 13 insertions(+), 56 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index 23ec53c19a2698..b543a764414a50 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -48,10 +48,11 @@ typedef struct _PyEncoderObject { PyObject *indent; PyObject *key_separator; PyObject *item_separator; - char sort_keys; - char skipkeys; - int allow_nan; - int (*fast_encode)(PyUnicodeWriter *, PyObject*); + bool sort_keys; + bool skipkeys; + bool allow_nan; + bool fast_encode; + bool ensure_ascii; /* used only when fast_encode == true */ } PyEncoderObject; #define PyEncoderObject_CAST(op) ((PyEncoderObject *)(op)) @@ -303,55 +304,9 @@ escape_unicode(PyObject *pystr) return rval; } -// Take a PyUnicode pystr and write an ASCII-only escaped string to writer. -// Same to ascii_escape_unicode(), but write to PyUnicodeWriter instead of -// return Unicode object. -static int -write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr) -{ - Py_ssize_t i; - Py_ssize_t input_chars; - Py_ssize_t chars; - Py_ssize_t copy_len = 0; - const void *input; - int kind; - int ret; - unsigned char buf[12]; - - input_chars = PyUnicode_GET_LENGTH(pystr); - input = PyUnicode_DATA(pystr); - kind = PyUnicode_KIND(pystr); - - ret = PyUnicodeWriter_WriteChar(writer, '"'); - if (ret) return ret; - - for (i = 0; i < input_chars; i++) { - Py_UCS4 c = PyUnicode_READ(kind, input, i); - if (S_CHAR(c)) { - copy_len++; - } - else { - ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i); - if (ret) return ret; - copy_len = 0; - - chars = ascii_escape_unichar(c, buf, 0); - ret = PyUnicodeWriter_WriteUTF8(writer, (const char*)buf, chars); - if (ret) return ret; - } - } - - ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i); - if (ret) return ret; - - return PyUnicodeWriter_WriteChar(writer, '"'); -} - // Take a PyUnicode pystr and write an escaped string to writer. -// Same to escape_unicode(), but write to PyUnicodeWriter instead of -// return Unicode object. static int -write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr) +write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr, bool ascii_only) { Py_ssize_t i; Py_ssize_t input_chars; @@ -371,7 +326,7 @@ write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr) for (i = 0; i < input_chars; i++) { Py_UCS4 c = PyUnicode_READ(kind, input, i); - if (c <= 0x1f || c == '\\' || c == '"') { + if (c <= 0x1f || c == '\\' || c == '"' || (ascii_only && c >= 0x7f)) { ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i); if (ret) return ret; copy_len = 0; @@ -1338,15 +1293,17 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) s->sort_keys = sort_keys; s->skipkeys = skipkeys; s->allow_nan = allow_nan; - s->fast_encode = NULL; + s->fast_encode = false; + s->ensure_ascii = false; if (PyCFunction_Check(s->encoder)) { PyCFunction f = PyCFunction_GetFunction(s->encoder); if (f == py_encode_basestring_ascii){ - s->fast_encode = write_escaped_ascii; + s->fast_encode = true; + s->ensure_ascii = true; } else if (f == py_encode_basestring) { - s->fast_encode = write_escaped_unicode; + s->fast_encode = true; } } @@ -1540,7 +1497,7 @@ static int encoder_write_string(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj) { if (s->fast_encode) { - return s->fast_encode(writer, obj); + return write_escaped_unicode(writer, obj, s->ensure_ascii); } /* Return the JSON representation of a string */ From 8e5e00b4eb43fbecb31dce2485c36684a86592bc Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 1 May 2025 10:33:00 +0900 Subject: [PATCH 5/6] use tmp buffer --- Modules/_json.c | 120 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 97 insertions(+), 23 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index b543a764414a50..d6628fc2e871d6 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -48,11 +48,10 @@ typedef struct _PyEncoderObject { PyObject *indent; PyObject *key_separator; PyObject *item_separator; + int (*fast_encode)(PyUnicodeWriter *, PyObject *); bool sort_keys; bool skipkeys; bool allow_nan; - bool fast_encode; - bool ensure_ascii; /* used only when fast_encode == true */ } PyEncoderObject; #define PyEncoderObject_CAST(op) ((PyEncoderObject *)(op)) @@ -304,18 +303,20 @@ escape_unicode(PyObject *pystr) return rval; } -// Take a PyUnicode pystr and write an escaped string to writer. +#define ESCAPE_BUF_SIZE 200 + +// Take a PyUnicode pystr and write an escaped string to writer. (ensure_ascii) static int -write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr, bool ascii_only) +write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr) { Py_ssize_t i; Py_ssize_t input_chars; - Py_ssize_t chars; - Py_ssize_t copy_len = 0; + Py_ssize_t buf_len; const void *input; + Py_UCS4 c = 0; int kind; int ret; - unsigned char buf[12]; + char buf[ESCAPE_BUF_SIZE]; // avoid overhead of PyUnicodeWriter APIs input_chars = PyUnicode_GET_LENGTH(pystr); input = PyUnicode_DATA(pystr); @@ -324,27 +325,102 @@ write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr, bool ascii_only) ret = PyUnicodeWriter_WriteChar(writer, '"'); if (ret) return ret; + // Fast path for string doesn't need escape at all: e.g. "id", "name" for (i = 0; i < input_chars; i++) { + c = PyUnicode_READ(kind, input, i); + if (!S_CHAR(c)) { + break; + } + } + if (i > 0) { + ret = PyUnicodeWriter_WriteSubstring(writer, pystr, 0, i); + if (ret) return ret; + } + if (i == input_chars) { + return PyUnicodeWriter_WriteChar(writer, '"'); + } + + buf_len = ascii_escape_unichar(c, (unsigned char*)buf, 0); + + for (i++ ; i < input_chars; i++) { Py_UCS4 c = PyUnicode_READ(kind, input, i); - if (c <= 0x1f || c == '\\' || c == '"' || (ascii_only && c >= 0x7f)) { - ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i); - if (ret) return ret; - copy_len = 0; + if (S_CHAR(c)) { + buf[buf_len++] = c; + } + else { + buf_len = ascii_escape_unichar(c, (unsigned char*)buf, buf_len); + } - chars = ascii_escape_unichar(c, buf, 0); - ret = PyUnicodeWriter_WriteUTF8(writer, (const char*)buf, chars); + if (buf_len + 12 > ESCAPE_BUF_SIZE) { + ret = PyUnicodeWriter_WriteUTF8(writer, buf, buf_len); if (ret) return ret; + buf_len = 0; + } + } + + assert(buf_len < ESCAPE_BUF_SIZE); + buf[buf_len++] = '"'; + return PyUnicodeWriter_WriteUTF8(writer, buf, buf_len); +} + +static int +write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr) +{ + Py_ssize_t i; + Py_ssize_t input_size; + Py_ssize_t buf_len; + const unsigned char *input; + int ret; + unsigned char c = 0; + char buf[ESCAPE_BUF_SIZE]; + + // We don't need to escape non-ASCII chars. + // So we just copy UTF-8 from pystr to buf. + input = (const unsigned char*) PyUnicode_AsUTF8AndSize(pystr, &input_size); + + ret = PyUnicodeWriter_WriteChar(writer, '"'); + if (ret) return ret; + + // Fast path for string doesn't need escape at all: e.g. "id", "name" + for (i = 0; i < input_size; i++) { + c = input[i]; + if (c <= 0x1f || c == '\\' || c == '"') { + break; + } + } + if (i > 0) { + ret = PyUnicodeWriter_WriteUTF8(writer, (const char *)input, i); + if (ret) return ret; + } + if (i == input_size) { + return PyUnicodeWriter_WriteChar(writer, '"'); + } + + buf_len = ascii_escape_unichar(c, (unsigned char *)buf, 0); + + for (i++; i < input_size; i++) { + c = input[i]; + if (c <= 0x1f || c == '\\' || c == '"') { + buf_len = ascii_escape_unichar(c, (unsigned char *)buf, buf_len); } else { - copy_len++; + buf[buf_len++] = c; + } + + if (buf_len + 6 > ESCAPE_BUF_SIZE) { + ret = PyUnicodeWriter_WriteUTF8(writer, buf, buf_len); + if (ret) return ret; + buf_len = 0; } } - ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i); - if (ret) return ret; - return PyUnicodeWriter_WriteChar(writer, '"'); + assert(buf_len < ESCAPE_BUF_SIZE); + buf[buf_len++] = '"'; + return PyUnicodeWriter_WriteUTF8(writer, buf, buf_len); } +#undef ESCAPE_BUF_SIZE + static void raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end) { @@ -1293,17 +1369,15 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) s->sort_keys = sort_keys; s->skipkeys = skipkeys; s->allow_nan = allow_nan; - s->fast_encode = false; - s->ensure_ascii = false; + s->fast_encode = NULL; if (PyCFunction_Check(s->encoder)) { PyCFunction f = PyCFunction_GetFunction(s->encoder); if (f == py_encode_basestring_ascii){ - s->fast_encode = true; - s->ensure_ascii = true; + s->fast_encode = write_escaped_ascii; } else if (f == py_encode_basestring) { - s->fast_encode = true; + s->fast_encode = write_escaped_unicode; } } @@ -1497,7 +1571,7 @@ static int encoder_write_string(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj) { if (s->fast_encode) { - return write_escaped_unicode(writer, obj, s->ensure_ascii); + return s->fast_encode(writer, obj); } /* Return the JSON representation of a string */ From 19c0f1fb5747a3f927f5f505966fdb732e75d953 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 1 May 2025 07:05:50 +0000 Subject: [PATCH 6/6] use UCS4 instead of UTF8 --- Modules/_json.c | 71 +++++++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 29 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index d6628fc2e871d6..cd08fa688d3a52 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -303,12 +303,11 @@ escape_unicode(PyObject *pystr) return rval; } -#define ESCAPE_BUF_SIZE 200 - // Take a PyUnicode pystr and write an escaped string to writer. (ensure_ascii) static int write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr) { +#define ESCAPE_BUF_SIZE 200 Py_ssize_t i; Py_ssize_t input_chars; Py_ssize_t buf_len; @@ -367,60 +366,74 @@ static int write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr) { Py_ssize_t i; - Py_ssize_t input_size; - Py_ssize_t buf_len; - const unsigned char *input; + Py_ssize_t input_chars; + Py_ssize_t chars = 0; + const void *input; + int kind; int ret; - unsigned char c = 0; - char buf[ESCAPE_BUF_SIZE]; + Py_UCS4 output[ESCAPE_BUF_SIZE]; - // We don't need to escape non-ASCII chars. - // So we just copy UTF-8 from pystr to buf. - input = (const unsigned char*) PyUnicode_AsUTF8AndSize(pystr, &input_size); + input_chars = PyUnicode_GET_LENGTH(pystr); + input = PyUnicode_DATA(pystr); + kind = PyUnicode_KIND(pystr); ret = PyUnicodeWriter_WriteChar(writer, '"'); if (ret) return ret; // Fast path for string doesn't need escape at all: e.g. "id", "name" - for (i = 0; i < input_size; i++) { - c = input[i]; + for (i = 0; i < input_chars; i++) { + Py_UCS4 c = PyUnicode_READ(kind, input, i); if (c <= 0x1f || c == '\\' || c == '"') { break; } } if (i > 0) { - ret = PyUnicodeWriter_WriteUTF8(writer, (const char *)input, i); + ret = PyUnicodeWriter_WriteSubstring(writer, pystr, 0, i); if (ret) return ret; } - if (i == input_size) { + if (i == input_chars) { return PyUnicodeWriter_WriteChar(writer, '"'); } - buf_len = ascii_escape_unichar(c, (unsigned char *)buf, 0); + for (; i < input_chars; i++) { + Py_UCS4 c = PyUnicode_READ(kind, input, i); - for (i++; i < input_size; i++) { - c = input[i]; - if (c <= 0x1f || c == '\\' || c == '"') { - buf_len = ascii_escape_unichar(c, (unsigned char *)buf, buf_len); - } - else { - buf[buf_len++] = c; + // Same to ENCODE_OUTPUT in escape_unicode + switch (c) { + case '\\': output[chars++] = '\\'; output[chars++] = c; break; + case '"': output[chars++] = '\\'; output[chars++] = c; break; + case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; + case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; + case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; + case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; + case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; + default: + if (c <= 0x1f) { + output[chars++] = '\\'; + output[chars++] = 'u'; + output[chars++] = '0'; + output[chars++] = '0'; + output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; + output[chars++] = Py_hexdigits[(c ) & 0xf]; + } else { + output[chars++] = c; + } } - if (buf_len + 6 > ESCAPE_BUF_SIZE) { - ret = PyUnicodeWriter_WriteUTF8(writer, buf, buf_len); + if (chars + 6 > ESCAPE_BUF_SIZE) { + ret = PyUnicodeWriter_WriteUCS4(writer, output, chars); if (ret) return ret; - buf_len = 0; + chars = 0; } } - assert(buf_len < ESCAPE_BUF_SIZE); - buf[buf_len++] = '"'; - return PyUnicodeWriter_WriteUTF8(writer, buf, buf_len); + assert(chars < ESCAPE_BUF_SIZE); + output[chars++] = '"'; + return PyUnicodeWriter_WriteUCS4(writer, output, chars); } - #undef ESCAPE_BUF_SIZE + static void raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end) { pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy