Skip to content

Commit 0a8de93

Browse files
committed
Speed up lexing of long JSON strings
Use optimized linear search when looking ahead for end quotes, backslashes, and non-printable characters. This results in nearly 40% faster JSON parsing on x86-64 when most values are long strings, and all platforms should see some improvement. Reviewed by Andres Freund and Nathan Bossart Discussion: https://www.postgresql.org/message-id/CAFBsxsGhaR2KQ5eisaK%3D6Vm60t%3DaxhD8Ckj1qFoCH1pktZi%2B2w%40mail.gmail.com Discussion: https://www.postgresql.org/message-id/CAFBsxsESLUyJ5spfOSyPrOvKUEYYNqsBosue9SV1j8ecgNXSKA%40mail.gmail.com
1 parent 0551912 commit 0a8de93

File tree

3 files changed

+28
-3
lines changed

3 files changed

+28
-3
lines changed

src/common/jsonapi.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#include "common/jsonapi.h"
2121
#include "mb/pg_wchar.h"
22+
#include "port/pg_lfind.h"
2223

2324
#ifndef FRONTEND
2425
#include "miscadmin.h"
@@ -844,7 +845,7 @@ json_lex_string(JsonLexContext *lex)
844845
}
845846
else
846847
{
847-
char *p;
848+
char *p = s;
848849

849850
if (hi_surrogate != -1)
850851
return JSON_UNICODE_LOW_SURROGATE;
@@ -853,11 +854,17 @@ json_lex_string(JsonLexContext *lex)
853854
* Skip to the first byte that requires special handling, so we
854855
* can batch calls to appendBinaryStringInfo.
855856
*/
856-
for (p = s; p < end; p++)
857+
while (p < end - sizeof(Vector8) &&
858+
!pg_lfind8('\\', (uint8 *) p, sizeof(Vector8)) &&
859+
!pg_lfind8('"', (uint8 *) p, sizeof(Vector8)) &&
860+
!pg_lfind8_le(31, (uint8 *) p, sizeof(Vector8)))
861+
p += sizeof(Vector8);
862+
863+
for (; p < end; p++)
857864
{
858865
if (*p == '\\' || *p == '"')
859866
break;
860-
else if ((unsigned char) *p < 32)
867+
else if ((unsigned char) *p <= 31)
861868
{
862869
/* Per RFC4627, these characters MUST be escaped. */
863870
/*

src/test/regress/expected/json.out

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,19 @@ LINE 1: SELECT '"\v"'::json;
4242
^
4343
DETAIL: Escape sequence "\v" is invalid.
4444
CONTEXT: JSON data, line 1: "\v...
45+
-- Check fast path for longer strings (at least 16 bytes long)
46+
SELECT ('"'||repeat('.', 12)||'abc"')::json; -- OK
47+
json
48+
-------------------
49+
"............abc"
50+
(1 row)
51+
52+
SELECT ('"'||repeat('.', 12)||'abc\n"')::json; -- OK, legal escapes
53+
json
54+
---------------------
55+
"............abc\n"
56+
(1 row)
57+
4558
-- see json_encoding test for input with unicode escapes
4659
-- Numbers.
4760
SELECT '1'::json; -- OK

src/test/regress/sql/json.sql

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ SELECT '"abc
77
def"'::json; -- ERROR, unescaped newline in string constant
88
SELECT '"\n\"\\"'::json; -- OK, legal escapes
99
SELECT '"\v"'::json; -- ERROR, not a valid JSON escape
10+
11+
-- Check fast path for longer strings (at least 16 bytes long)
12+
SELECT ('"'||repeat('.', 12)||'abc"')::json; -- OK
13+
SELECT ('"'||repeat('.', 12)||'abc\n"')::json; -- OK, legal escapes
14+
1015
-- see json_encoding test for input with unicode escapes
1116

1217
-- Numbers.

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy