Skip to content

Commit 7b8069b

Browse files
author
Nikita Glukhov
committed
Sort jsonb object values by length
1 parent 4eaf89f commit 7b8069b

File tree

3 files changed

+184
-36
lines changed

3 files changed

+184
-36
lines changed

src/backend/utils/adt/jsonb_op.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -324,10 +324,10 @@ json_hash_internal(FunctionCallInfo fcinfo, bool is_jsonb)
324324
{
325325
/* Rotation is left to JsonbHashScalarValue() */
326326
case WJB_BEGIN_ARRAY:
327-
hash ^= JB_FARRAY;
327+
hash ^= JB_TARRAY;
328328
break;
329329
case WJB_BEGIN_OBJECT:
330-
hash ^= JB_FOBJECT;
330+
hash ^= JB_TOBJECT;
331331
break;
332332
case WJB_KEY:
333333
case WJB_VALUE:
@@ -382,10 +382,10 @@ json_hash_extended_internal(FunctionCallInfo fcinfo, bool is_jsonb)
382382
{
383383
/* Rotation is left to JsonbHashScalarValueExtended() */
384384
case WJB_BEGIN_ARRAY:
385-
hash ^= ((uint64) JB_FARRAY) << 32 | JB_FARRAY;
385+
hash ^= ((uint64) JB_TARRAY) << 32 | JB_TARRAY;
386386
break;
387387
case WJB_BEGIN_OBJECT:
388-
hash ^= ((uint64) JB_FOBJECT) << 32 | JB_FOBJECT;
388+
hash ^= ((uint64) JB_TOBJECT) << 32 | JB_TOBJECT;
389389
break;
390390
case WJB_KEY:
391391
case WJB_VALUE:

src/backend/utils/adt/jsonb_util.c

Lines changed: 162 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
#include "utils/memutils.h"
3232
#include "utils/varlena.h"
3333

34+
#define JSONB_SORTED_VALUES 1
35+
3436
/*
3537
* Maximum number of elements in an array (or key/value pairs in an object).
3638
* This is limited by two things: the size of the JEntry array must fit
@@ -81,6 +83,7 @@ struct JsonbIterator
8183
const JEntry *children; /* JEntrys for child nodes */
8284
/* Data proper. This points to the beginning of the variable-length data */
8385
char *dataProper;
86+
uint32 *kvMap;
8487

8588
/* Current item in buffer (up to nElems) */
8689
int curIndex;
@@ -561,6 +564,8 @@ getKeyJsonValueFromContainer(JsonContainer *jsc,
561564
const JEntry *children = container->children;
562565
int count = JsonContainerSize(jsc);
563566
char *baseAddr;
567+
bool sorted_values = (container->header & JB_TMASK) == JB_TOBJECT_SORTED;
568+
const uint32 *kvmap;
564569
uint32 stopLow,
565570
stopHigh;
566571

@@ -574,7 +579,16 @@ getKeyJsonValueFromContainer(JsonContainer *jsc,
574579
* Binary search the container. Since we know this is an object, account
575580
* for *Pairs* of Jentrys
576581
*/
577-
baseAddr = (char *) (children + count * 2);
582+
if (sorted_values)
583+
{
584+
kvmap = &children[count * 2];
585+
baseAddr = (char *) &kvmap[count];
586+
}
587+
else
588+
{
589+
kvmap = NULL;
590+
baseAddr = (char *) (children + count * 2);
591+
}
578592
stopLow = 0;
579593
stopHigh = count;
580594
while (stopLow < stopHigh)
@@ -595,7 +609,7 @@ getKeyJsonValueFromContainer(JsonContainer *jsc,
595609
if (difference == 0)
596610
{
597611
/* Found our key, return corresponding value */
598-
int index = stopMiddle + count;
612+
int index = (sorted_values ? kvmap[stopMiddle] : stopMiddle) + count;
599613

600614
if (!res)
601615
res = palloc(sizeof(JsonbValue));
@@ -1197,14 +1211,18 @@ JsonbIteratorNext(JsonIterator **jsit, JsonbValue *val, bool skipNested)
11971211
(*it)->state = JBI_OBJECT_KEY;
11981212

11991213
fillCompressedJsonbValue((*it)->compressed, (*it)->container,
1200-
(*it)->curIndex + (*it)->nElems,
1201-
(*it)->dataProper, (*it)->curValueOffset,
1214+
((*it)->kvMap ? (*it)->kvMap[(*it)->curIndex] : (*it)->curIndex) + (*it)->nElems,
1215+
(*it)->dataProper,
1216+
(*it)->kvMap ?
1217+
getJsonbOffset((*it)->container, (*it)->kvMap[(*it)->curIndex] + (*it)->nElems) :
1218+
(*it)->curValueOffset,
12021219
val);
12031220

12041221
JBE_ADVANCE_OFFSET((*it)->curDataOffset,
12051222
(*it)->children[(*it)->curIndex]);
1206-
JBE_ADVANCE_OFFSET((*it)->curValueOffset,
1207-
(*it)->children[(*it)->curIndex + (*it)->nElems]);
1223+
if (!(*it)->kvMap)
1224+
JBE_ADVANCE_OFFSET((*it)->curValueOffset,
1225+
(*it)->children[(*it)->curIndex + (*it)->nElems]);
12081226
(*it)->curIndex++;
12091227

12101228
/*
@@ -1256,24 +1274,34 @@ jsonbIteratorInit(JsonContainer *cont, const JsonbContainer *container,
12561274
/* Array starts just after header */
12571275
it->children = container->children;
12581276

1259-
switch (container->header & (JB_FARRAY | JB_FOBJECT))
1277+
switch (container->header & JB_TMASK)
12601278
{
1261-
case JB_FARRAY:
1279+
case JB_TSCALAR:
1280+
it->isScalar = true;
1281+
/* FALLTHROUGH */
1282+
case JB_TARRAY:
12621283
it->dataProper =
12631284
(char *) it->children + it->nElems * sizeof(JEntry);
1264-
it->isScalar = (container->header & JB_FSCALAR) != 0;
12651285
/* This is either a "raw scalar", or an array */
12661286
Assert(!it->isScalar || it->nElems == 1);
12671287

12681288
it->state = JBI_ARRAY_START;
12691289
break;
12701290

1271-
case JB_FOBJECT:
1291+
case JB_TOBJECT:
1292+
it->kvMap = NULL;
12721293
it->dataProper =
12731294
(char *) it->children + it->nElems * sizeof(JEntry) * 2;
12741295
it->state = JBI_OBJECT_START;
12751296
break;
12761297

1298+
case JB_TOBJECT_SORTED:
1299+
it->kvMap = (uint32 *)
1300+
((char *) it->children + it->nElems * sizeof(JEntry) * 2);
1301+
it->dataProper = (char *) &it->kvMap[it->nElems];
1302+
it->state = JBI_OBJECT_START;
1303+
break;
1304+
12771305
default:
12781306
elog(ERROR, "unknown type of jsonb container");
12791307
}
@@ -1877,13 +1905,14 @@ convertJsonbArray(StringInfo buffer, JEntry *pheader, const JsonbValue *val, int
18771905
* Construct the header Jentry and store it in the beginning of the
18781906
* variable-length payload.
18791907
*/
1880-
header = nElems | JB_FARRAY;
18811908
if (val->val.array.rawScalar)
18821909
{
18831910
Assert(nElems == 1);
18841911
Assert(level == 0);
1885-
header |= JB_FSCALAR;
1912+
header = nElems | JB_TSCALAR;
18861913
}
1914+
else
1915+
header = nElems | JB_TARRAY;
18871916

18881917
appendToBuffer(buffer, (char *) &header, sizeof(uint32));
18891918

@@ -1941,6 +1970,48 @@ convertJsonbArray(StringInfo buffer, JEntry *pheader, const JsonbValue *val, int
19411970
*pheader = JENTRY_ISCONTAINER | totallen;
19421971
}
19431972

1973+
static int
1974+
int_cmp(const void *a, const void *b)
1975+
{
1976+
int x = *(const int *) a;
1977+
int y = *(const int *) b;
1978+
1979+
return x == y ? 0 : x > y ? 1 : -1;
1980+
}
1981+
1982+
static int
1983+
estimateJsonbValueSize(const JsonbValue *jbv)
1984+
{
1985+
int size;
1986+
1987+
switch (jbv->type)
1988+
{
1989+
case jbvNull:
1990+
case jbvBool:
1991+
return 0;
1992+
case jbvString:
1993+
return jbv->val.string.len;
1994+
case jbvNumeric:
1995+
return VARSIZE_ANY(jbv->val.numeric);
1996+
case jbvArray:
1997+
size = offsetof(JsonbContainer, children[jbv->val.array.nElems]);
1998+
for (int i = 0; i < jbv->val.array.nElems; i++)
1999+
size += estimateJsonbValueSize(&jbv->val.array.elems[i]);
2000+
return size;
2001+
case jbvObject:
2002+
size = offsetof(JsonbContainer, children[jbv->val.object.nPairs * 2]);
2003+
for (int i = 0; i < jbv->val.object.nPairs; i++)
2004+
{
2005+
size += estimateJsonbValueSize(&jbv->val.object.pairs[i].key);
2006+
size += estimateJsonbValueSize(&jbv->val.object.pairs[i].value);
2007+
}
2008+
return size;
2009+
default:
2010+
elog(ERROR, "invalid jsonb value type: %d", jbv->type);
2011+
return 0;
2012+
}
2013+
}
2014+
19442015
static void
19452016
convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, int level)
19462017
{
@@ -1950,9 +2021,39 @@ convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, in
19502021
int totallen;
19512022
uint32 header;
19522023
int nPairs = val->val.object.nPairs;
2024+
int reserved_size;
2025+
bool sorted_values = JSONB_SORTED_VALUES && nPairs > 1;
2026+
struct
2027+
{
2028+
int size;
2029+
int32 index;
2030+
} *values = sorted_values ? palloc(sizeof(*values) * nPairs) : NULL;
19532031

19542032
Assert(nPairs >= 0);
19552033

2034+
if (sorted_values)
2035+
{
2036+
for (i = 0; i < nPairs; i++)
2037+
{
2038+
values[i].index = i;
2039+
values[i].size = estimateJsonbValueSize(&val->val.object.pairs[i].value);
2040+
}
2041+
2042+
qsort(values, nPairs, sizeof(*values), int_cmp);
2043+
2044+
/* check if keys were really moved */
2045+
sorted_values = false;
2046+
2047+
for (i = 0; i < nPairs; i++)
2048+
{
2049+
if (values[i].index != i)
2050+
{
2051+
sorted_values = true;
2052+
break;
2053+
}
2054+
}
2055+
}
2056+
19562057
/* Remember where in the buffer this object starts. */
19572058
base_offset = buffer->len;
19582059

@@ -1963,17 +2064,30 @@ convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, in
19632064
* Construct the header Jentry and store it in the beginning of the
19642065
* variable-length payload.
19652066
*/
1966-
header = nPairs | JB_FOBJECT;
2067+
header = nPairs | (sorted_values ? JB_TOBJECT_SORTED : JB_TOBJECT);
19672068
appendToBuffer(buffer, (char *) &header, sizeof(uint32));
19682069

19692070
/* Reserve space for the JEntries of the keys and values. */
1970-
jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nPairs * 2);
2071+
reserved_size = sizeof(JEntry) * nPairs * 2;
2072+
if (sorted_values)
2073+
reserved_size += sizeof(int32) * nPairs;
2074+
2075+
jentry_offset = reserveFromBuffer(buffer, reserved_size);
2076+
2077+
/* Write key-value map */
2078+
if (sorted_values)
2079+
{
2080+
for (i = 0; i < nPairs; i++)
2081+
copyToBuffer(buffer, jentry_offset + sizeof(JEntry) * nPairs * 2 + values[i].index * sizeof(int32),
2082+
&i, sizeof(int32));
2083+
}
19712084

19722085
/*
19732086
* Iterate over the keys, then over the values, since that is the ordering
19742087
* we want in the on-disk representation.
19752088
*/
19762089
totallen = 0;
2090+
19772091
for (i = 0; i < nPairs; i++)
19782092
{
19792093
JsonbPair *pair = &val->val.object.pairs[i];
@@ -2009,9 +2123,11 @@ convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, in
20092123
copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
20102124
jentry_offset += sizeof(JEntry);
20112125
}
2126+
20122127
for (i = 0; i < nPairs; i++)
20132128
{
2014-
JsonbPair *pair = &val->val.object.pairs[i];
2129+
int val_index = sorted_values ? values[i].index : i;
2130+
JsonbPair *pair = &val->val.object.pairs[val_index];
20152131
int len;
20162132
JEntry meta;
20172133

@@ -2045,6 +2161,9 @@ convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, in
20452161
jentry_offset += sizeof(JEntry);
20462162
}
20472163

2164+
if (values)
2165+
pfree(values);
2166+
20482167
/* Total data size is everything we've appended to buffer */
20492168
totallen = buffer->len - base_offset;
20502169

@@ -2339,16 +2458,35 @@ JsonUniquify(Json *json)
23392458
return json;
23402459
}
23412460

2461+
static void
2462+
jsonbInitContainerFromHeader(JsonContainerData *jc, JsonbContainer *jbc)
2463+
{
2464+
jc->size = jbc->header & JB_CMASK;
2465+
switch (jbc->header & JB_TMASK)
2466+
{
2467+
case JB_TOBJECT:
2468+
case JB_TOBJECT_SORTED:
2469+
jc->type = jbvObject;
2470+
break;
2471+
case JB_TARRAY:
2472+
jc->type = jbvArray;
2473+
break;
2474+
case JB_TSCALAR:
2475+
jc->type = jbvArray | jbvScalar;
2476+
break;
2477+
default:
2478+
elog(ERROR, "invalid jsonb container type: %d",
2479+
jbc->header & JB_TMASK);
2480+
}
2481+
}
2482+
23422483
static void
23432484
jsonbInitContainer(JsonContainerData *jc, JsonbContainer *jbc, int len)
23442485
{
23452486
jc->ops = &jsonbContainerOps;
23462487
JsonContainerDataPtr(jc) = jbc;
23472488
jc->len = len;
2348-
jc->size = jbc->header & JB_CMASK;
2349-
jc->type = jbc->header & JB_FOBJECT ? jbvObject :
2350-
jbc->header & JB_FSCALAR ? jbvArray | jbvScalar :
2351-
jbvArray;
2489+
jsonbInitContainerFromHeader(jc, jbc);
23522490
}
23532491

23542492
static void
@@ -2462,10 +2600,7 @@ jsonbzInitContainer(JsonContainerData *jc, CompressedJsonb *cjb, int len)
24622600

24632601
jc->ops = &jsonbzContainerOps;
24642602
jc->len = len;
2465-
jc->size = jbc->header & JB_CMASK;
2466-
jc->type = jbc->header & JB_FOBJECT ? jbvObject :
2467-
jbc->header & JB_FSCALAR ? jbvArray | jbvScalar :
2468-
jbvArray;
2603+
jsonbInitContainerFromHeader(jc, jbc);
24692604
}
24702605

24712606
static JsonbContainer *
@@ -2533,7 +2668,9 @@ findValueInCompressedJsonbObject(CompressedJsonb *cjb, const char *keystr, int k
25332668
JEntry *children = container->children;
25342669
int count = container->header & JB_CMASK;
25352670
/* Since this is an object, account for *Pairs* of Jentrys */
2536-
char *base_addr = (char *) (children + count * 2);
2671+
bool sorted_values = (container->header & JB_TMASK) == JB_TOBJECT_SORTED;
2672+
char *base_addr = (char *) (children + count * 2) + (sorted_values ? sizeof(uint32) * count : 0);
2673+
uint32 *kvmap = sorted_values ? &container->children[count * 2] : NULL;
25372674
Size base_offset = base_addr - (char *) jb;
25382675
uint32 stopLow = 0,
25392676
stopHigh = count;
@@ -2575,7 +2712,7 @@ findValueInCompressedJsonbObject(CompressedJsonb *cjb, const char *keystr, int k
25752712
if (difference == 0)
25762713
{
25772714
/* Found our key, return corresponding value */
2578-
int index = stopMiddle + count;
2715+
int index = (sorted_values ? kvmap[stopMiddle] : stopMiddle) + count;
25792716

25802717
return fillCompressedJsonbValue(cjb, container, index, base_addr,
25812718
getJsonbOffset(container, index),

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy