diff --git a/Misc/NEWS.d/next/Library/2025-06-11-15-08-02.gh-issue-135336.6Gq6MI.rst b/Misc/NEWS.d/next/Library/2025-06-11-15-08-02.gh-issue-135336.6Gq6MI.rst new file mode 100644 index 00000000000000..8a1d492ff08944 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-11-15-08-02.gh-issue-135336.6Gq6MI.rst @@ -0,0 +1 @@ +:mod:`json` now encodes strings up to 2.2x faster if they consist solely of characters that don’t require escaping. diff --git a/Modules/_json.c b/Modules/_json.c index 7580b589e2d937..1f64e04a37583a 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -51,7 +51,7 @@ typedef struct _PyEncoderObject { char sort_keys; char skipkeys; int allow_nan; - PyCFunction fast_encode; + int (*fast_encode)(PyUnicodeWriter *, PyObject *); } PyEncoderObject; #define PyEncoderObject_CAST(op) ((PyEncoderObject *)(op)) @@ -102,8 +102,10 @@ static PyObject * _encoded_const(PyObject *obj); static void raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end); -static PyObject * -encoder_encode_string(PyEncoderObject *s, PyObject *obj); +static int +_steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen); +static int +encoder_write_string(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj); static PyObject * encoder_encode_float(PyEncoderObject *s, PyObject *obj); @@ -146,22 +148,11 @@ ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars) return chars; } -static PyObject * -ascii_escape_unicode(PyObject *pystr) +static Py_ssize_t +ascii_escape_size(const void *input, int kind, Py_ssize_t input_chars) { - /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */ Py_ssize_t i; - Py_ssize_t input_chars; Py_ssize_t output_size; - Py_ssize_t chars; - PyObject *rval; - const void *input; - Py_UCS1 *output; - int kind; - - input_chars = PyUnicode_GET_LENGTH(pystr); - input = PyUnicode_DATA(pystr); - kind = PyUnicode_KIND(pystr); /* Compute the output size */ for (i = 0, output_size = 2; i < input_chars; i++) { @@ -181,11 +172,22 @@ ascii_escape_unicode(PyObject *pystr) } if (output_size > PY_SSIZE_T_MAX - d) { PyErr_SetString(PyExc_OverflowError, "string is too long to escape"); - return NULL; + return -1; } output_size += d; } + return output_size; +} + +static PyObject * +ascii_escape_unicode_and_size(const void *input, int kind, Py_ssize_t input_chars, Py_ssize_t output_size) +{ + Py_ssize_t i; + Py_ssize_t chars; + PyObject *rval; + Py_UCS1 *output; + rval = PyUnicode_New(output_size, 127); if (rval == NULL) { return NULL; @@ -210,23 +212,66 @@ ascii_escape_unicode(PyObject *pystr) } static PyObject * -escape_unicode(PyObject *pystr) +ascii_escape_unicode(PyObject *pystr) +{ + /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */ + Py_ssize_t input_chars; + const void *input; + int kind; + + input_chars = PyUnicode_GET_LENGTH(pystr); + input = PyUnicode_DATA(pystr); + kind = PyUnicode_KIND(pystr); + + Py_ssize_t output_size = ascii_escape_size(input, kind, input_chars); + if (output_size < 0) { + return NULL; + } + + return ascii_escape_unicode_and_size(input, kind, input_chars, output_size); +} + +static int +write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr) { - /* Take a PyUnicode pystr and return a new escaped PyUnicode */ - Py_ssize_t i; Py_ssize_t input_chars; - Py_ssize_t output_size; - Py_ssize_t chars; - PyObject *rval; const void *input; int kind; - Py_UCS4 maxchar; - maxchar = PyUnicode_MAX_CHAR_VALUE(pystr); input_chars = PyUnicode_GET_LENGTH(pystr); input = PyUnicode_DATA(pystr); kind = PyUnicode_KIND(pystr); + Py_ssize_t output_size = ascii_escape_size(input, kind, input_chars); + if (output_size < 0) { + return -1; + } + + if (output_size == input_chars + 2) { + /* No need to escape anything */ + if (PyUnicodeWriter_WriteChar(writer, '"') < 0) { + return -1; + } + if (PyUnicodeWriter_WriteStr(writer, pystr) < 0) { + return -1; + } + return PyUnicodeWriter_WriteChar(writer, '"'); + } + + PyObject *rval = ascii_escape_unicode_and_size(input, kind, input_chars, output_size); + if (rval == NULL) { + return -1; + } + + return _steal_accumulate(writer, rval); +} + +static Py_ssize_t +escape_size(const void *input, int kind, Py_ssize_t input_chars) +{ + Py_ssize_t i; + Py_ssize_t output_size; + /* Compute the output size */ for (i = 0, output_size = 2; i < input_chars; i++) { Py_UCS4 c = PyUnicode_READ(kind, input, i); @@ -244,11 +289,21 @@ escape_unicode(PyObject *pystr) } if (output_size > PY_SSIZE_T_MAX - d) { PyErr_SetString(PyExc_OverflowError, "string is too long to escape"); - return NULL; + return -1; } output_size += d; } + return output_size; +} + +static PyObject * +escape_unicode_and_size(const void *input, int kind, Py_UCS4 maxchar, Py_ssize_t input_chars, Py_ssize_t output_size) +{ + Py_ssize_t i; + Py_ssize_t chars; + PyObject *rval; + rval = PyUnicode_New(output_size, maxchar); if (rval == NULL) return NULL; @@ -303,6 +358,65 @@ escape_unicode(PyObject *pystr) return rval; } +static PyObject * +escape_unicode(PyObject *pystr) +{ + /* Take a PyUnicode pystr and return a new escaped PyUnicode */ + Py_ssize_t input_chars; + const void *input; + int kind; + Py_UCS4 maxchar; + + maxchar = PyUnicode_MAX_CHAR_VALUE(pystr); + input_chars = PyUnicode_GET_LENGTH(pystr); + input = PyUnicode_DATA(pystr); + kind = PyUnicode_KIND(pystr); + + Py_ssize_t output_size = escape_size(input, kind, input_chars); + if (output_size < 0) { + return NULL; + } + + return escape_unicode_and_size(input, kind, maxchar, input_chars, output_size); +} + +static int +write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr) +{ + Py_ssize_t input_chars; + const void *input; + int kind; + Py_UCS4 maxchar; + + maxchar = PyUnicode_MAX_CHAR_VALUE(pystr); + input_chars = PyUnicode_GET_LENGTH(pystr); + input = PyUnicode_DATA(pystr); + kind = PyUnicode_KIND(pystr); + + Py_ssize_t output_size = escape_size(input, kind, input_chars); + if (output_size < 0) { + return -1; + } + + if (output_size == input_chars + 2) { + /* No need to escape anything */ + if (PyUnicodeWriter_WriteChar(writer, '"') < 0) { + return -1; + } + if (PyUnicodeWriter_WriteStr(writer, pystr) < 0) { + return -1; + } + return PyUnicodeWriter_WriteChar(writer, '"'); + } + + PyObject *rval = escape_unicode_and_size(input, kind, maxchar, input_chars, output_size); + if (rval == NULL) { + return -1; + } + + return _steal_accumulate(writer, rval); +} + static void raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end) { @@ -1256,8 +1370,11 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (PyCFunction_Check(s->encoder)) { PyCFunction f = PyCFunction_GetFunction(s->encoder); - if (f == py_encode_basestring_ascii || f == py_encode_basestring) { - s->fast_encode = f; + if (f == py_encode_basestring_ascii) { + s->fast_encode = write_escaped_ascii; + } + else if (f == py_encode_basestring) { + s->fast_encode = write_escaped_unicode; } } @@ -1438,24 +1555,27 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj) return PyFloat_Type.tp_repr(obj); } -static PyObject * -encoder_encode_string(PyEncoderObject *s, PyObject *obj) +static int +encoder_write_string(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj) { /* Return the JSON representation of a string */ PyObject *encoded; if (s->fast_encode) { - return s->fast_encode(NULL, obj); + return s->fast_encode(writer, obj); } encoded = PyObject_CallOneArg(s->encoder, obj); - if (encoded != NULL && !PyUnicode_Check(encoded)) { + if (encoded == NULL) { + return -1; + } + if (!PyUnicode_Check(encoded)) { PyErr_Format(PyExc_TypeError, "encoder() must return a string, not %.80s", Py_TYPE(encoded)->tp_name); Py_DECREF(encoded); - return NULL; + return -1; } - return encoded; + return _steal_accumulate(writer, encoded); } static int @@ -1486,10 +1606,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, return PyUnicodeWriter_WriteASCII(writer, "false", 5); } else if (PyUnicode_Check(obj)) { - PyObject *encoded = encoder_encode_string(s, obj); - if (encoded == NULL) - return -1; - return _steal_accumulate(writer, encoded); + return encoder_write_string(s, writer, obj); } else if (PyLong_Check(obj)) { if (PyLong_CheckExact(obj)) { @@ -1578,7 +1695,7 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs PyObject *item_separator) { PyObject *keystr = NULL; - PyObject *encoded; + int rv; if (PyUnicode_Check(key)) { keystr = Py_NewRef(key); @@ -1624,13 +1741,10 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs } } - encoded = encoder_encode_string(s, keystr); + rv = encoder_write_string(s, writer, keystr); Py_DECREF(keystr); - if (encoded == NULL) { - return -1; - } - if (_steal_accumulate(writer, encoded) < 0) { + if (rv < 0) { return -1; } if (PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: