From 8f956aa989893952d40b5c7393254b38a61d0456 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 12:24:34 +0200 Subject: [PATCH] gh-125196: Use PyUnicodeWriter for JSON encoder Replace the private _PyUnicodeWriter with the public PyUnicodeWriter. --- Modules/_json.c | 87 ++++++++++++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index 9e29de0f22465f..ce0093ab431d05 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -86,11 +86,11 @@ encoder_dealloc(PyObject *self); static int encoder_clear(PyEncoderObject *self); static int -encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent); +encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent); static int -encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent); +encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent); static int -encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent); +encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent); static PyObject * _encoded_const(PyObject *obj); static void @@ -1268,38 +1268,39 @@ encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds) { /* Python callable interface to encode_listencode_obj */ static char *kwlist[] = {"obj", "_current_indent_level", NULL}; - PyObject *obj, *result; + PyObject *obj; Py_ssize_t indent_level; - _PyUnicodeWriter writer; if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist, - &obj, &indent_level)) + &obj, &indent_level)) return NULL; - _PyUnicodeWriter_Init(&writer); - writer.overallocate = 1; + PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); + if (writer == NULL) { + return NULL; + } PyObject *newline_indent = NULL; if (self->indent != Py_None) { newline_indent = _create_newline_indent(self->indent, indent_level); if (newline_indent == NULL) { - _PyUnicodeWriter_Dealloc(&writer); + PyUnicodeWriter_Discard(writer); return NULL; } } - if (encoder_listencode_obj(self, &writer, obj, newline_indent)) { - _PyUnicodeWriter_Dealloc(&writer); + if (encoder_listencode_obj(self, writer, obj, newline_indent)) { + PyUnicodeWriter_Discard(writer); Py_XDECREF(newline_indent); return NULL; } Py_XDECREF(newline_indent); - result = PyTuple_New(1); - if (result == NULL || - PyTuple_SetItem(result, 0, _PyUnicodeWriter_Finish(&writer)) < 0) { - Py_XDECREF(result); + PyObject *str = PyUnicodeWriter_Finish(writer); + if (str == NULL) { return NULL; } + PyObject *result = PyTuple_Pack(1, str); + Py_DECREF(str); return result; } @@ -1370,16 +1371,16 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj) } static int -_steal_accumulate(_PyUnicodeWriter *writer, PyObject *stolen) +_steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen) { /* Append stolen and then decrement its reference count */ - int rval = _PyUnicodeWriter_WriteStr(writer, stolen); + int rval = PyUnicodeWriter_WriteStr(writer, stolen); Py_DECREF(stolen); return rval; } static int -encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, +encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent) { /* Encode Python object obj to a JSON term */ @@ -1387,13 +1388,13 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, int rv; if (obj == Py_None) { - return _PyUnicodeWriter_WriteASCIIString(writer, "null", 4); + return PyUnicodeWriter_WriteUTF8(writer, "null", 4); } else if (obj == Py_True) { - return _PyUnicodeWriter_WriteASCIIString(writer, "true", 4); + return PyUnicodeWriter_WriteUTF8(writer, "true", 4); } else if (obj == Py_False) { - return _PyUnicodeWriter_WriteASCIIString(writer, "false", 5); + return PyUnicodeWriter_WriteUTF8(writer, "false", 5); } else if (PyUnicode_Check(obj)) { PyObject *encoded = encoder_encode_string(s, obj); @@ -1402,6 +1403,10 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, return _steal_accumulate(writer, encoded); } else if (PyLong_Check(obj)) { + if (PyLong_CheckExact(obj)) { + // Fast-path for exact integers + return PyUnicodeWriter_WriteRepr(writer, obj); + } PyObject *encoded = PyLong_Type.tp_repr(obj); if (encoded == NULL) return -1; @@ -1478,7 +1483,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, } static int -encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first, +encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *first, PyObject *dct, PyObject *key, PyObject *value, PyObject *newline_indent, PyObject *item_separator) @@ -1518,7 +1523,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir *first = false; } else { - if (_PyUnicodeWriter_WriteStr(writer, item_separator) < 0) { + if (PyUnicodeWriter_WriteStr(writer, item_separator) < 0) { Py_DECREF(keystr); return -1; } @@ -1533,7 +1538,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir if (_steal_accumulate(writer, encoded) < 0) { return -1; } - if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) { + if (PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) { return -1; } if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) { @@ -1544,7 +1549,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir } static int -encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, +encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent) { /* Encode Python dict dct a JSON term */ @@ -1555,8 +1560,10 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *new_newline_indent = NULL; PyObject *separator_indent = NULL; - if (PyDict_GET_SIZE(dct) == 0) /* Fast path */ - return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2); + if (PyDict_GET_SIZE(dct) == 0) { + /* Fast path */ + return PyUnicodeWriter_WriteUTF8(writer, "{}", 2); + } if (s->markers != Py_None) { int has_key; @@ -1574,8 +1581,9 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, } } - if (_PyUnicodeWriter_WriteChar(writer, '{')) + if (PyUnicodeWriter_WriteChar(writer, '{')) { goto bail; + } PyObject *current_item_separator = s->item_separator; // borrowed reference if (s->indent != Py_None) { @@ -1589,7 +1597,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, } // update item separator with a borrowed reference current_item_separator = separator_indent; - if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) { + if (PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) { goto bail; } } @@ -1635,13 +1643,14 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, Py_CLEAR(new_newline_indent); Py_CLEAR(separator_indent); - if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { + if (PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { goto bail; } } - if (_PyUnicodeWriter_WriteChar(writer, '}')) + if (PyUnicodeWriter_WriteChar(writer, '}')) { goto bail; + } return 0; bail: @@ -1653,7 +1662,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, } static int -encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, +encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent) { PyObject *ident = NULL; @@ -1668,7 +1677,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, return -1; if (PySequence_Fast_GET_SIZE(s_fast) == 0) { Py_DECREF(s_fast); - return _PyUnicodeWriter_WriteASCIIString(writer, "[]", 2); + return PyUnicodeWriter_WriteUTF8(writer, "[]", 2); } if (s->markers != Py_None) { @@ -1687,8 +1696,9 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, } } - if (_PyUnicodeWriter_WriteChar(writer, '[')) + if (PyUnicodeWriter_WriteChar(writer, '[')) { goto bail; + } PyObject *separator = s->item_separator; // borrowed reference if (s->indent != Py_None) { @@ -1697,7 +1707,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, goto bail; } - if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) { + if (PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) { goto bail; } @@ -1710,7 +1720,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); if (i) { - if (_PyUnicodeWriter_WriteStr(writer, separator) < 0) + if (PyUnicodeWriter_WriteStr(writer, separator) < 0) goto bail; } if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) { @@ -1727,13 +1737,14 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, if (s->indent != Py_None) { Py_CLEAR(new_newline_indent); Py_CLEAR(separator_indent); - if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { + if (PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { goto bail; } } - if (_PyUnicodeWriter_WriteChar(writer, ']')) + if (PyUnicodeWriter_WriteChar(writer, ']')) { goto bail; + } Py_DECREF(s_fast); return 0; pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy