Skip to content

Commit c2f1010

Browse files
committed
json: Optimize escaping string in Encoder
1 parent 4e294f6 commit c2f1010

File tree

1 file changed

+115
-33
lines changed

1 file changed

+115
-33
lines changed

Modules/_json.c

Lines changed: 115 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ typedef struct _PyEncoderObject {
5151
char sort_keys;
5252
char skipkeys;
5353
int allow_nan;
54-
PyCFunction fast_encode;
54+
int (*fast_encode)(PyUnicodeWriter *, PyObject*);
5555
} PyEncoderObject;
5656

5757
#define PyEncoderObject_CAST(op) ((PyEncoderObject *)(op))
@@ -102,8 +102,8 @@ static PyObject *
102102
_encoded_const(PyObject *obj);
103103
static void
104104
raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
105-
static PyObject *
106-
encoder_encode_string(PyEncoderObject *s, PyObject *obj);
105+
static int
106+
encoder_write_string(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj);
107107
static PyObject *
108108
encoder_encode_float(PyEncoderObject *s, PyObject *obj);
109109

@@ -303,6 +303,89 @@ escape_unicode(PyObject *pystr)
303303
return rval;
304304
}
305305

306+
// Take a PyUnicode pystr and write an ASCII-only escaped string to writer.
307+
static int
308+
write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr)
309+
{
310+
Py_ssize_t i;
311+
Py_ssize_t input_chars;
312+
Py_ssize_t chars;
313+
Py_ssize_t copy_len = 0;
314+
const void *input;
315+
int kind;
316+
int ret;
317+
unsigned char buf[12];
318+
319+
input_chars = PyUnicode_GET_LENGTH(pystr);
320+
input = PyUnicode_DATA(pystr);
321+
kind = PyUnicode_KIND(pystr);
322+
323+
ret = PyUnicodeWriter_WriteChar(writer, '"');
324+
if (ret) return ret;
325+
326+
for (i = 0; i < input_chars; i++) {
327+
Py_UCS4 c = PyUnicode_READ(kind, input, i);
328+
if (S_CHAR(c)) {
329+
copy_len++;
330+
}
331+
else {
332+
ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i);
333+
if (ret) return ret;
334+
copy_len = 0;
335+
336+
chars = ascii_escape_unichar(c, buf, 0);
337+
ret = PyUnicodeWriter_WriteUTF8(writer, (const char*)buf, chars);
338+
if (ret) return ret;
339+
}
340+
}
341+
342+
ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i);
343+
if (ret) return ret;
344+
345+
return PyUnicodeWriter_WriteChar(writer, '"');
346+
}
347+
348+
// Take a PyUnicode pystr and write an escaped string to writer.
349+
static int
350+
write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr)
351+
{
352+
Py_ssize_t i;
353+
Py_ssize_t input_chars;
354+
Py_ssize_t chars;
355+
Py_ssize_t copy_len = 0;
356+
const void *input;
357+
int kind;
358+
int ret;
359+
unsigned char buf[12];
360+
361+
input_chars = PyUnicode_GET_LENGTH(pystr);
362+
input = PyUnicode_DATA(pystr);
363+
kind = PyUnicode_KIND(pystr);
364+
365+
ret = PyUnicodeWriter_WriteChar(writer, '"');
366+
if (ret) return ret;
367+
368+
for (i = 0; i < input_chars; i++) {
369+
Py_UCS4 c = PyUnicode_READ(kind, input, i);
370+
if (c <= 0x1f || c == '\\' || c == '"') {
371+
ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i);
372+
if (ret) return ret;
373+
copy_len = 0;
374+
375+
chars = ascii_escape_unichar(c, buf, 0);
376+
ret = PyUnicodeWriter_WriteUTF8(writer, (const char*)buf, chars);
377+
if (ret) return ret;
378+
}
379+
else {
380+
copy_len++;
381+
}
382+
}
383+
384+
ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i);
385+
if (ret) return ret;
386+
return PyUnicodeWriter_WriteChar(writer, '"');
387+
}
388+
306389
static void
307390
raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
308391
{
@@ -1255,8 +1338,11 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
12551338

12561339
if (PyCFunction_Check(s->encoder)) {
12571340
PyCFunction f = PyCFunction_GetFunction(s->encoder);
1258-
if (f == py_encode_basestring_ascii || f == py_encode_basestring) {
1259-
s->fast_encode = f;
1341+
if (f == py_encode_basestring_ascii){
1342+
s->fast_encode = write_escaped_ascii;
1343+
}
1344+
else if (f == py_encode_basestring) {
1345+
s->fast_encode = write_escaped_unicode;
12601346
}
12611347
}
12621348

@@ -1437,33 +1523,35 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj)
14371523
return PyFloat_Type.tp_repr(obj);
14381524
}
14391525

1440-
static PyObject *
1441-
encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1526+
static int
1527+
_steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen)
14421528
{
1443-
/* Return the JSON representation of a string */
1444-
PyObject *encoded;
1529+
/* Append stolen and then decrement its reference count */
1530+
int rval = PyUnicodeWriter_WriteStr(writer, stolen);
1531+
Py_DECREF(stolen);
1532+
return rval;
1533+
}
14451534

1535+
static int
1536+
encoder_write_string(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj)
1537+
{
14461538
if (s->fast_encode) {
1447-
return s->fast_encode(NULL, obj);
1539+
return s->fast_encode(writer, obj);
1540+
}
1541+
1542+
/* Return the JSON representation of a string */
1543+
PyObject *encoded = PyObject_CallOneArg(s->encoder, obj);
1544+
if (encoded == NULL) {
1545+
return -1;
14481546
}
1449-
encoded = PyObject_CallOneArg(s->encoder, obj);
14501547
if (encoded != NULL && !PyUnicode_Check(encoded)) {
14511548
PyErr_Format(PyExc_TypeError,
14521549
"encoder() must return a string, not %.80s",
14531550
Py_TYPE(encoded)->tp_name);
14541551
Py_DECREF(encoded);
1455-
return NULL;
1552+
return -1;
14561553
}
1457-
return encoded;
1458-
}
1459-
1460-
static int
1461-
_steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen)
1462-
{
1463-
/* Append stolen and then decrement its reference count */
1464-
int rval = PyUnicodeWriter_WriteStr(writer, stolen);
1465-
Py_DECREF(stolen);
1466-
return rval;
1554+
return _steal_accumulate(writer, encoded);
14671555
}
14681556

14691557
static int
@@ -1485,10 +1573,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
14851573
return PyUnicodeWriter_WriteUTF8(writer, "false", 5);
14861574
}
14871575
else if (PyUnicode_Check(obj)) {
1488-
PyObject *encoded = encoder_encode_string(s, obj);
1489-
if (encoded == NULL)
1490-
return -1;
1491-
return _steal_accumulate(writer, encoded);
1576+
return encoder_write_string(s, writer, obj);
14921577
}
14931578
else if (PyLong_Check(obj)) {
14941579
if (PyLong_CheckExact(obj)) {
@@ -1577,7 +1662,7 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
15771662
PyObject *item_separator)
15781663
{
15791664
PyObject *keystr = NULL;
1580-
PyObject *encoded;
1665+
int rv;
15811666

15821667
if (PyUnicode_Check(key)) {
15831668
keystr = Py_NewRef(key);
@@ -1617,15 +1702,12 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
16171702
}
16181703
}
16191704

1620-
encoded = encoder_encode_string(s, keystr);
1705+
rv = encoder_write_string(s, writer, keystr);
16211706
Py_DECREF(keystr);
1622-
if (encoded == NULL) {
1623-
return -1;
1707+
if (rv != 0) {
1708+
return rv;
16241709
}
16251710

1626-
if (_steal_accumulate(writer, encoded) < 0) {
1627-
return -1;
1628-
}
16291711
if (PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
16301712
return -1;
16311713
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy