Skip to content

Commit c60c9ba

Browse files
committed
Convert json_in and jsonb_in to report errors softly.
This requires a bit of further infrastructure-extension to allow trapping errors reported by numeric_in and pg_unicode_to_server, but otherwise it's pretty straightforward. In the case of jsonb_in, we are only capturing errors reported during the initial "parse" phase. The value-construction phase (JsonbValueToJsonb) can also throw errors if assorted implementation limits are exceeded. We should improve that, but it seems like a separable project. Andrew Dunstan and Tom Lane Discussion: https://postgr.es/m/3bac9841-fe07-713d-fa42-606c225567d6@dunslane.net
1 parent 50428a3 commit c60c9ba

File tree

17 files changed

+282
-46
lines changed

17 files changed

+282
-46
lines changed

src/backend/utils/adt/json.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,10 @@ json_in(PG_FUNCTION_ARGS)
8181

8282
/* validate it */
8383
lex = makeJsonLexContext(result, false);
84-
pg_parse_json_or_ereport(lex, &nullSemAction);
84+
if (!pg_parse_json_or_errsave(lex, &nullSemAction, fcinfo->context))
85+
PG_RETURN_NULL();
8586

86-
/* Internal representation is the same as text, for now */
87+
/* Internal representation is the same as text */
8788
PG_RETURN_TEXT_P(result);
8889
}
8990

@@ -1337,7 +1338,7 @@ json_typeof(PG_FUNCTION_ARGS)
13371338
/* Lex exactly one token from the input and check its type. */
13381339
result = json_lex(lex);
13391340
if (result != JSON_SUCCESS)
1340-
json_ereport_error(result, lex);
1341+
json_errsave_error(result, lex, NULL);
13411342
tok = lex->token_type;
13421343
switch (tok)
13431344
{

src/backend/utils/adt/jsonb.c

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ typedef struct JsonbInState
3333
{
3434
JsonbParseState *parseState;
3535
JsonbValue *res;
36+
Node *escontext;
3637
} JsonbInState;
3738

3839
/* unlike with json categories, we need to treat json and jsonb differently */
@@ -61,8 +62,8 @@ typedef struct JsonbAggState
6162
Oid val_output_func;
6263
} JsonbAggState;
6364

64-
static inline Datum jsonb_from_cstring(char *json, int len);
65-
static size_t checkStringLen(size_t len);
65+
static inline Datum jsonb_from_cstring(char *json, int len, Node *escontext);
66+
static bool checkStringLen(size_t len, Node *escontext);
6667
static JsonParseErrorType jsonb_in_object_start(void *pstate);
6768
static JsonParseErrorType jsonb_in_object_end(void *pstate);
6869
static JsonParseErrorType jsonb_in_array_start(void *pstate);
@@ -98,7 +99,7 @@ jsonb_in(PG_FUNCTION_ARGS)
9899
{
99100
char *json = PG_GETARG_CSTRING(0);
100101

101-
return jsonb_from_cstring(json, strlen(json));
102+
return jsonb_from_cstring(json, strlen(json), fcinfo->context);
102103
}
103104

104105
/*
@@ -122,7 +123,7 @@ jsonb_recv(PG_FUNCTION_ARGS)
122123
else
123124
elog(ERROR, "unsupported jsonb version number %d", version);
124125

125-
return jsonb_from_cstring(str, nbytes);
126+
return jsonb_from_cstring(str, nbytes, NULL);
126127
}
127128

128129
/*
@@ -251,9 +252,12 @@ jsonb_typeof(PG_FUNCTION_ARGS)
251252
* Turns json string into a jsonb Datum.
252253
*
253254
* Uses the json parser (with hooks) to construct a jsonb.
255+
*
256+
* If escontext points to an ErrorSaveContext, errors are reported there
257+
* instead of being thrown.
254258
*/
255259
static inline Datum
256-
jsonb_from_cstring(char *json, int len)
260+
jsonb_from_cstring(char *json, int len, Node *escontext)
257261
{
258262
JsonLexContext *lex;
259263
JsonbInState state;
@@ -263,6 +267,7 @@ jsonb_from_cstring(char *json, int len)
263267
memset(&sem, 0, sizeof(sem));
264268
lex = makeJsonLexContextCstringLen(json, len, GetDatabaseEncoding(), true);
265269

270+
state.escontext = escontext;
266271
sem.semstate = (void *) &state;
267272

268273
sem.object_start = jsonb_in_object_start;
@@ -272,23 +277,24 @@ jsonb_from_cstring(char *json, int len)
272277
sem.scalar = jsonb_in_scalar;
273278
sem.object_field_start = jsonb_in_object_field_start;
274279

275-
pg_parse_json_or_ereport(lex, &sem);
280+
if (!pg_parse_json_or_errsave(lex, &sem, escontext))
281+
return (Datum) 0;
276282

277283
/* after parsing, the item member has the composed jsonb structure */
278284
PG_RETURN_POINTER(JsonbValueToJsonb(state.res));
279285
}
280286

281-
static size_t
282-
checkStringLen(size_t len)
287+
static bool
288+
checkStringLen(size_t len, Node *escontext)
283289
{
284290
if (len > JENTRY_OFFLENMASK)
285-
ereport(ERROR,
291+
ereturn(escontext, false,
286292
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
287293
errmsg("string too long to represent as jsonb string"),
288294
errdetail("Due to an implementation restriction, jsonb strings cannot exceed %d bytes.",
289295
JENTRY_OFFLENMASK)));
290296

291-
return len;
297+
return true;
292298
}
293299

294300
static JsonParseErrorType
@@ -339,7 +345,9 @@ jsonb_in_object_field_start(void *pstate, char *fname, bool isnull)
339345

340346
Assert(fname != NULL);
341347
v.type = jbvString;
342-
v.val.string.len = checkStringLen(strlen(fname));
348+
v.val.string.len = strlen(fname);
349+
if (!checkStringLen(v.val.string.len, _state->escontext))
350+
return JSON_SEM_ACTION_FAILED;
343351
v.val.string.val = fname;
344352

345353
_state->res = pushJsonbValue(&_state->parseState, WJB_KEY, &v);
@@ -390,7 +398,9 @@ jsonb_in_scalar(void *pstate, char *token, JsonTokenType tokentype)
390398
case JSON_TOKEN_STRING:
391399
Assert(token != NULL);
392400
v.type = jbvString;
393-
v.val.string.len = checkStringLen(strlen(token));
401+
v.val.string.len = strlen(token);
402+
if (!checkStringLen(v.val.string.len, _state->escontext))
403+
return JSON_SEM_ACTION_FAILED;
394404
v.val.string.val = token;
395405
break;
396406
case JSON_TOKEN_NUMBER:
@@ -401,10 +411,11 @@ jsonb_in_scalar(void *pstate, char *token, JsonTokenType tokentype)
401411
*/
402412
Assert(token != NULL);
403413
v.type = jbvNumeric;
404-
numd = DirectFunctionCall3(numeric_in,
405-
CStringGetDatum(token),
406-
ObjectIdGetDatum(InvalidOid),
407-
Int32GetDatum(-1));
414+
if (!DirectInputFunctionCallSafe(numeric_in, token,
415+
InvalidOid, -1,
416+
_state->escontext,
417+
&numd))
418+
return JSON_SEM_ACTION_FAILED;
408419
v.val.numeric = DatumGetNumeric(numd);
409420
break;
410421
case JSON_TOKEN_TRUE:
@@ -738,6 +749,9 @@ jsonb_categorize_type(Oid typoid,
738749
*
739750
* If key_scalar is true, the value is stored as a key, so insist
740751
* it's of an acceptable type, and force it to be a jbvString.
752+
*
753+
* Note: currently, we assume that result->escontext is NULL and errors
754+
* will be thrown.
741755
*/
742756
static void
743757
datum_to_jsonb(Datum val, bool is_null, JsonbInState *result,
@@ -910,7 +924,8 @@ datum_to_jsonb(Datum val, bool is_null, JsonbInState *result,
910924
default:
911925
outputstr = OidOutputFunctionCall(outfuncoid, val);
912926
jb.type = jbvString;
913-
jb.val.string.len = checkStringLen(strlen(outputstr));
927+
jb.val.string.len = strlen(outputstr);
928+
(void) checkStringLen(jb.val.string.len, NULL);
914929
jb.val.string.val = outputstr;
915930
break;
916931
}
@@ -1648,6 +1663,7 @@ jsonb_agg_finalfn(PG_FUNCTION_ARGS)
16481663
* shallow clone is sufficient as we aren't going to change any of the
16491664
* values, just add the final array end marker.
16501665
*/
1666+
memset(&result, 0, sizeof(JsonbInState));
16511667

16521668
result.parseState = clone_parse_state(arg->res->parseState);
16531669

@@ -1880,6 +1896,7 @@ jsonb_object_agg_finalfn(PG_FUNCTION_ARGS)
18801896
* going to change any of the values, just add the final object end
18811897
* marker.
18821898
*/
1899+
memset(&result, 0, sizeof(JsonbInState));
18831900

18841901
result.parseState = clone_parse_state(arg->res->parseState);
18851902

src/backend/utils/adt/jsonfuncs.c

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "lib/stringinfo.h"
2626
#include "mb/pg_wchar.h"
2727
#include "miscadmin.h"
28+
#include "nodes/miscnodes.h"
2829
#include "utils/array.h"
2930
#include "utils/builtins.h"
3031
#include "utils/fmgroids.h"
@@ -490,21 +491,31 @@ static JsonParseErrorType transform_string_values_object_field_start(void *state
490491
static JsonParseErrorType transform_string_values_array_element_start(void *state, bool isnull);
491492
static JsonParseErrorType transform_string_values_scalar(void *state, char *token, JsonTokenType tokentype);
492493

494+
493495
/*
494-
* pg_parse_json_or_ereport
496+
* pg_parse_json_or_errsave
495497
*
496498
* This function is like pg_parse_json, except that it does not return a
497499
* JsonParseErrorType. Instead, in case of any failure, this function will
500+
* save error data into *escontext if that's an ErrorSaveContext, otherwise
498501
* ereport(ERROR).
502+
*
503+
* Returns a boolean indicating success or failure (failure will only be
504+
* returned when escontext is an ErrorSaveContext).
499505
*/
500-
void
501-
pg_parse_json_or_ereport(JsonLexContext *lex, JsonSemAction *sem)
506+
bool
507+
pg_parse_json_or_errsave(JsonLexContext *lex, JsonSemAction *sem,
508+
Node *escontext)
502509
{
503510
JsonParseErrorType result;
504511

505512
result = pg_parse_json(lex, sem);
506513
if (result != JSON_SUCCESS)
507-
json_ereport_error(result, lex);
514+
{
515+
json_errsave_error(result, lex, escontext);
516+
return false;
517+
}
518+
return true;
508519
}
509520

510521
/*
@@ -608,17 +619,25 @@ jsonb_object_keys(PG_FUNCTION_ARGS)
608619
* Report a JSON error.
609620
*/
610621
void
611-
json_ereport_error(JsonParseErrorType error, JsonLexContext *lex)
622+
json_errsave_error(JsonParseErrorType error, JsonLexContext *lex,
623+
Node *escontext)
612624
{
613625
if (error == JSON_UNICODE_HIGH_ESCAPE ||
626+
error == JSON_UNICODE_UNTRANSLATABLE ||
614627
error == JSON_UNICODE_CODE_POINT_ZERO)
615-
ereport(ERROR,
628+
errsave(escontext,
616629
(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
617630
errmsg("unsupported Unicode escape sequence"),
618631
errdetail_internal("%s", json_errdetail(error, lex)),
619632
report_json_context(lex)));
633+
else if (error == JSON_SEM_ACTION_FAILED)
634+
{
635+
/* semantic action function had better have reported something */
636+
if (!SOFT_ERROR_OCCURRED(escontext))
637+
elog(ERROR, "JSON semantic action function did not provide error information");
638+
}
620639
else
621-
ereport(ERROR,
640+
errsave(escontext,
622641
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
623642
errmsg("invalid input syntax for type %s", "json"),
624643
errdetail_internal("%s", json_errdetail(error, lex)),
@@ -1274,7 +1293,7 @@ get_array_start(void *state)
12741293

12751294
error = json_count_array_elements(_state->lex, &nelements);
12761295
if (error != JSON_SUCCESS)
1277-
json_ereport_error(error, _state->lex);
1296+
json_errsave_error(error, _state->lex, NULL);
12781297

12791298
if (-_state->path_indexes[lex_level] <= nelements)
12801299
_state->path_indexes[lex_level] += nelements;

src/backend/utils/fmgr/fmgr.c

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1614,6 +1614,51 @@ InputFunctionCallSafe(FmgrInfo *flinfo, char *str,
16141614
return true;
16151615
}
16161616

1617+
/*
1618+
* Call a directly-named datatype input function, with non-exception
1619+
* handling of "soft" errors.
1620+
*
1621+
* This is like InputFunctionCallSafe, except that it is given a direct
1622+
* pointer to the C function to call. We assume that that function is
1623+
* strict. Also, the function cannot be one that needs to
1624+
* look at FmgrInfo, since there won't be any.
1625+
*/
1626+
bool
1627+
DirectInputFunctionCallSafe(PGFunction func, char *str,
1628+
Oid typioparam, int32 typmod,
1629+
fmNodePtr escontext,
1630+
Datum *result)
1631+
{
1632+
LOCAL_FCINFO(fcinfo, 3);
1633+
1634+
if (str == NULL)
1635+
{
1636+
*result = (Datum) 0; /* just return null result */
1637+
return true;
1638+
}
1639+
1640+
InitFunctionCallInfoData(*fcinfo, NULL, 3, InvalidOid, escontext, NULL);
1641+
1642+
fcinfo->args[0].value = CStringGetDatum(str);
1643+
fcinfo->args[0].isnull = false;
1644+
fcinfo->args[1].value = ObjectIdGetDatum(typioparam);
1645+
fcinfo->args[1].isnull = false;
1646+
fcinfo->args[2].value = Int32GetDatum(typmod);
1647+
fcinfo->args[2].isnull = false;
1648+
1649+
*result = (*func) (fcinfo);
1650+
1651+
/* Result value is garbage, and could be null, if an error was reported */
1652+
if (SOFT_ERROR_OCCURRED(escontext))
1653+
return false;
1654+
1655+
/* Otherwise, shouldn't get null result */
1656+
if (fcinfo->isnull)
1657+
elog(ERROR, "input function %p returned NULL", (void *) func);
1658+
1659+
return true;
1660+
}
1661+
16171662
/*
16181663
* Call a previously-looked-up datatype output function.
16191664
*

src/backend/utils/mb/mbutils.c

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -916,6 +916,63 @@ pg_unicode_to_server(pg_wchar c, unsigned char *s)
916916
BoolGetDatum(false));
917917
}
918918

919+
/*
920+
* Convert a single Unicode code point into a string in the server encoding.
921+
*
922+
* Same as pg_unicode_to_server(), except that we don't throw errors,
923+
* but simply return false on conversion failure.
924+
*/
925+
bool
926+
pg_unicode_to_server_noerror(pg_wchar c, unsigned char *s)
927+
{
928+
unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
929+
int c_as_utf8_len;
930+
int converted_len;
931+
int server_encoding;
932+
933+
/* Fail if invalid Unicode code point */
934+
if (!is_valid_unicode_codepoint(c))
935+
return false;
936+
937+
/* Otherwise, if it's in ASCII range, conversion is trivial */
938+
if (c <= 0x7F)
939+
{
940+
s[0] = (unsigned char) c;
941+
s[1] = '\0';
942+
return true;
943+
}
944+
945+
/* If the server encoding is UTF-8, we just need to reformat the code */
946+
server_encoding = GetDatabaseEncoding();
947+
if (server_encoding == PG_UTF8)
948+
{
949+
unicode_to_utf8(c, s);
950+
s[pg_utf_mblen(s)] = '\0';
951+
return true;
952+
}
953+
954+
/* For all other cases, we must have a conversion function available */
955+
if (Utf8ToServerConvProc == NULL)
956+
return false;
957+
958+
/* Construct UTF-8 source string */
959+
unicode_to_utf8(c, c_as_utf8);
960+
c_as_utf8_len = pg_utf_mblen(c_as_utf8);
961+
c_as_utf8[c_as_utf8_len] = '\0';
962+
963+
/* Convert, but without throwing error if we can't */
964+
converted_len = DatumGetInt32(FunctionCall6(Utf8ToServerConvProc,
965+
Int32GetDatum(PG_UTF8),
966+
Int32GetDatum(server_encoding),
967+
CStringGetDatum((char *) c_as_utf8),
968+
CStringGetDatum((char *) s),
969+
Int32GetDatum(c_as_utf8_len),
970+
BoolGetDatum(true)));
971+
972+
/* Conversion was successful iff it consumed the whole input */
973+
return (converted_len == c_as_utf8_len);
974+
}
975+
919976

920977
/* convert a multibyte string to a wchar */
921978
int

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy