Skip to content

Commit cd87628

Browse files
author
Nikita Glukhov
committed
Sort jsonb object values by length
1 parent cc64c7f commit cd87628

File tree

3 files changed

+184
-36
lines changed

3 files changed

+184
-36
lines changed

src/backend/utils/adt/jsonb_op.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -324,10 +324,10 @@ json_hash_internal(FunctionCallInfo fcinfo, bool is_jsonb)
324324
{
325325
/* Rotation is left to JsonbHashScalarValue() */
326326
case WJB_BEGIN_ARRAY:
327-
hash ^= JB_FARRAY;
327+
hash ^= JB_TARRAY;
328328
break;
329329
case WJB_BEGIN_OBJECT:
330-
hash ^= JB_FOBJECT;
330+
hash ^= JB_TOBJECT;
331331
break;
332332
case WJB_KEY:
333333
case WJB_VALUE:
@@ -382,10 +382,10 @@ json_hash_extended_internal(FunctionCallInfo fcinfo, bool is_jsonb)
382382
{
383383
/* Rotation is left to JsonbHashScalarValueExtended() */
384384
case WJB_BEGIN_ARRAY:
385-
hash ^= ((uint64) JB_FARRAY) << 32 | JB_FARRAY;
385+
hash ^= ((uint64) JB_TARRAY) << 32 | JB_TARRAY;
386386
break;
387387
case WJB_BEGIN_OBJECT:
388-
hash ^= ((uint64) JB_FOBJECT) << 32 | JB_FOBJECT;
388+
hash ^= ((uint64) JB_TOBJECT) << 32 | JB_TOBJECT;
389389
break;
390390
case WJB_KEY:
391391
case WJB_VALUE:

src/backend/utils/adt/jsonb_util.c

Lines changed: 162 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
#include "utils/memutils.h"
3232
#include "utils/varlena.h"
3333

34+
#define JSONB_SORTED_VALUES 1
35+
3436
/*
3537
* Maximum number of elements in an array (or key/value pairs in an object).
3638
* This is limited by two things: the size of the JEntry array must fit
@@ -81,6 +83,7 @@ struct JsonbIterator
8183
const JEntry *children; /* JEntrys for child nodes */
8284
/* Data proper. This points to the beginning of the variable-length data */
8385
char *dataProper;
86+
uint32 *kvMap;
8487

8588
/* Current item in buffer (up to nElems) */
8689
int curIndex;
@@ -562,6 +565,8 @@ getKeyJsonValueFromContainer(JsonContainer *jsc,
562565
const JEntry *children = container->children;
563566
int count = JsonContainerSize(jsc);
564567
char *baseAddr;
568+
bool sorted_values = (container->header & JB_TMASK) == JB_TOBJECT_SORTED;
569+
const uint32 *kvmap;
565570
uint32 stopLow,
566571
stopHigh;
567572

@@ -575,7 +580,16 @@ getKeyJsonValueFromContainer(JsonContainer *jsc,
575580
* Binary search the container. Since we know this is an object, account
576581
* for *Pairs* of Jentrys
577582
*/
578-
baseAddr = (char *) (children + count * 2);
583+
if (sorted_values)
584+
{
585+
kvmap = &children[count * 2];
586+
baseAddr = (char *) &kvmap[count];
587+
}
588+
else
589+
{
590+
kvmap = NULL;
591+
baseAddr = (char *) (children + count * 2);
592+
}
579593
stopLow = 0;
580594
stopHigh = count;
581595
while (stopLow < stopHigh)
@@ -596,7 +610,7 @@ getKeyJsonValueFromContainer(JsonContainer *jsc,
596610
if (difference == 0)
597611
{
598612
/* Found our key, return corresponding value */
599-
int index = stopMiddle + count;
613+
int index = (sorted_values ? kvmap[stopMiddle] : stopMiddle) + count;
600614

601615
if (!res)
602616
res = palloc(sizeof(JsonbValue));
@@ -1134,14 +1148,18 @@ JsonbIteratorNext(JsonIterator **jsit, JsonbValue *val, bool skipNested)
11341148
(*it)->state = JBI_OBJECT_KEY;
11351149

11361150
fillCompressedJsonbValue((*it)->compressed, (*it)->container,
1137-
(*it)->curIndex + (*it)->nElems,
1138-
(*it)->dataProper, (*it)->curValueOffset,
1151+
((*it)->kvMap ? (*it)->kvMap[(*it)->curIndex] : (*it)->curIndex) + (*it)->nElems,
1152+
(*it)->dataProper,
1153+
(*it)->kvMap ?
1154+
getJsonbOffset((*it)->container, (*it)->kvMap[(*it)->curIndex] + (*it)->nElems) :
1155+
(*it)->curValueOffset,
11391156
val);
11401157

11411158
JBE_ADVANCE_OFFSET((*it)->curDataOffset,
11421159
(*it)->children[(*it)->curIndex]);
1143-
JBE_ADVANCE_OFFSET((*it)->curValueOffset,
1144-
(*it)->children[(*it)->curIndex + (*it)->nElems]);
1160+
if (!(*it)->kvMap)
1161+
JBE_ADVANCE_OFFSET((*it)->curValueOffset,
1162+
(*it)->children[(*it)->curIndex + (*it)->nElems]);
11451163
(*it)->curIndex++;
11461164

11471165
/*
@@ -1193,24 +1211,34 @@ jsonbIteratorInit(JsonContainer *cont, const JsonbContainer *container,
11931211
/* Array starts just after header */
11941212
it->children = container->children;
11951213

1196-
switch (container->header & (JB_FARRAY | JB_FOBJECT))
1214+
switch (container->header & JB_TMASK)
11971215
{
1198-
case JB_FARRAY:
1216+
case JB_TSCALAR:
1217+
it->isScalar = true;
1218+
/* FALLTHROUGH */
1219+
case JB_TARRAY:
11991220
it->dataProper =
12001221
(char *) it->children + it->nElems * sizeof(JEntry);
1201-
it->isScalar = (container->header & JB_FSCALAR) != 0;
12021222
/* This is either a "raw scalar", or an array */
12031223
Assert(!it->isScalar || it->nElems == 1);
12041224

12051225
it->state = JBI_ARRAY_START;
12061226
break;
12071227

1208-
case JB_FOBJECT:
1228+
case JB_TOBJECT:
1229+
it->kvMap = NULL;
12091230
it->dataProper =
12101231
(char *) it->children + it->nElems * sizeof(JEntry) * 2;
12111232
it->state = JBI_OBJECT_START;
12121233
break;
12131234

1235+
case JB_TOBJECT_SORTED:
1236+
it->kvMap = (uint32 *)
1237+
((char *) it->children + it->nElems * sizeof(JEntry) * 2);
1238+
it->dataProper = (char *) &it->kvMap[it->nElems];
1239+
it->state = JBI_OBJECT_START;
1240+
break;
1241+
12141242
default:
12151243
elog(ERROR, "unknown type of jsonb container");
12161244
}
@@ -1814,13 +1842,14 @@ convertJsonbArray(StringInfo buffer, JEntry *pheader, const JsonbValue *val, int
18141842
* Construct the header Jentry and store it in the beginning of the
18151843
* variable-length payload.
18161844
*/
1817-
header = nElems | JB_FARRAY;
18181845
if (val->val.array.rawScalar)
18191846
{
18201847
Assert(nElems == 1);
18211848
Assert(level == 0);
1822-
header |= JB_FSCALAR;
1849+
header = nElems | JB_TSCALAR;
18231850
}
1851+
else
1852+
header = nElems | JB_TARRAY;
18241853

18251854
appendToBuffer(buffer, (char *) &header, sizeof(uint32));
18261855

@@ -1878,6 +1907,48 @@ convertJsonbArray(StringInfo buffer, JEntry *pheader, const JsonbValue *val, int
18781907
*pheader = JENTRY_ISCONTAINER | totallen;
18791908
}
18801909

1910+
static int
1911+
int_cmp(const void *a, const void *b)
1912+
{
1913+
int x = *(const int *) a;
1914+
int y = *(const int *) b;
1915+
1916+
return x == y ? 0 : x > y ? 1 : -1;
1917+
}
1918+
1919+
static int
1920+
estimateJsonbValueSize(const JsonbValue *jbv)
1921+
{
1922+
int size;
1923+
1924+
switch (jbv->type)
1925+
{
1926+
case jbvNull:
1927+
case jbvBool:
1928+
return 0;
1929+
case jbvString:
1930+
return jbv->val.string.len;
1931+
case jbvNumeric:
1932+
return VARSIZE_ANY(jbv->val.numeric);
1933+
case jbvArray:
1934+
size = offsetof(JsonbContainer, children[jbv->val.array.nElems]);
1935+
for (int i = 0; i < jbv->val.array.nElems; i++)
1936+
size += estimateJsonbValueSize(&jbv->val.array.elems[i]);
1937+
return size;
1938+
case jbvObject:
1939+
size = offsetof(JsonbContainer, children[jbv->val.object.nPairs * 2]);
1940+
for (int i = 0; i < jbv->val.object.nPairs; i++)
1941+
{
1942+
size += estimateJsonbValueSize(&jbv->val.object.pairs[i].key);
1943+
size += estimateJsonbValueSize(&jbv->val.object.pairs[i].value);
1944+
}
1945+
return size;
1946+
default:
1947+
elog(ERROR, "invalid jsonb value type: %d", jbv->type);
1948+
return 0;
1949+
}
1950+
}
1951+
18811952
static void
18821953
convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, int level)
18831954
{
@@ -1887,9 +1958,39 @@ convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, in
18871958
int totallen;
18881959
uint32 header;
18891960
int nPairs = val->val.object.nPairs;
1961+
int reserved_size;
1962+
bool sorted_values = JSONB_SORTED_VALUES && nPairs > 1;
1963+
struct
1964+
{
1965+
int size;
1966+
int32 index;
1967+
} *values = sorted_values ? palloc(sizeof(*values) * nPairs) : NULL;
18901968

18911969
Assert(nPairs >= 0);
18921970

1971+
if (sorted_values)
1972+
{
1973+
for (i = 0; i < nPairs; i++)
1974+
{
1975+
values[i].index = i;
1976+
values[i].size = estimateJsonbValueSize(&val->val.object.pairs[i].value);
1977+
}
1978+
1979+
qsort(values, nPairs, sizeof(*values), int_cmp);
1980+
1981+
/* check if keys were really moved */
1982+
sorted_values = false;
1983+
1984+
for (i = 0; i < nPairs; i++)
1985+
{
1986+
if (values[i].index != i)
1987+
{
1988+
sorted_values = true;
1989+
break;
1990+
}
1991+
}
1992+
}
1993+
18931994
/* Remember where in the buffer this object starts. */
18941995
base_offset = buffer->len;
18951996

@@ -1900,17 +2001,30 @@ convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, in
19002001
* Construct the header Jentry and store it in the beginning of the
19012002
* variable-length payload.
19022003
*/
1903-
header = nPairs | JB_FOBJECT;
2004+
header = nPairs | (sorted_values ? JB_TOBJECT_SORTED : JB_TOBJECT);
19042005
appendToBuffer(buffer, (char *) &header, sizeof(uint32));
19052006

19062007
/* Reserve space for the JEntries of the keys and values. */
1907-
jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nPairs * 2);
2008+
reserved_size = sizeof(JEntry) * nPairs * 2;
2009+
if (sorted_values)
2010+
reserved_size += sizeof(int32) * nPairs;
2011+
2012+
jentry_offset = reserveFromBuffer(buffer, reserved_size);
2013+
2014+
/* Write key-value map */
2015+
if (sorted_values)
2016+
{
2017+
for (i = 0; i < nPairs; i++)
2018+
copyToBuffer(buffer, jentry_offset + sizeof(JEntry) * nPairs * 2 + values[i].index * sizeof(int32),
2019+
&i, sizeof(int32));
2020+
}
19082021

19092022
/*
19102023
* Iterate over the keys, then over the values, since that is the ordering
19112024
* we want in the on-disk representation.
19122025
*/
19132026
totallen = 0;
2027+
19142028
for (i = 0; i < nPairs; i++)
19152029
{
19162030
JsonbPair *pair = &val->val.object.pairs[i];
@@ -1946,9 +2060,11 @@ convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, in
19462060
copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
19472061
jentry_offset += sizeof(JEntry);
19482062
}
2063+
19492064
for (i = 0; i < nPairs; i++)
19502065
{
1951-
JsonbPair *pair = &val->val.object.pairs[i];
2066+
int val_index = sorted_values ? values[i].index : i;
2067+
JsonbPair *pair = &val->val.object.pairs[val_index];
19522068
int len;
19532069
JEntry meta;
19542070

@@ -1982,6 +2098,9 @@ convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, in
19822098
jentry_offset += sizeof(JEntry);
19832099
}
19842100

2101+
if (values)
2102+
pfree(values);
2103+
19852104
/* Total data size is everything we've appended to buffer */
19862105
totallen = buffer->len - base_offset;
19872106

@@ -2276,16 +2395,35 @@ JsonUniquify(Json *json)
22762395
return json;
22772396
}
22782397

2398+
static void
2399+
jsonbInitContainerFromHeader(JsonContainerData *jc, JsonbContainer *jbc)
2400+
{
2401+
jc->size = jbc->header & JB_CMASK;
2402+
switch (jbc->header & JB_TMASK)
2403+
{
2404+
case JB_TOBJECT:
2405+
case JB_TOBJECT_SORTED:
2406+
jc->type = jbvObject;
2407+
break;
2408+
case JB_TARRAY:
2409+
jc->type = jbvArray;
2410+
break;
2411+
case JB_TSCALAR:
2412+
jc->type = jbvArray | jbvScalar;
2413+
break;
2414+
default:
2415+
elog(ERROR, "invalid jsonb container type: %d",
2416+
jbc->header & JB_TMASK);
2417+
}
2418+
}
2419+
22792420
static void
22802421
jsonbInitContainer(JsonContainerData *jc, JsonbContainer *jbc, int len)
22812422
{
22822423
jc->ops = &jsonbContainerOps;
22832424
JsonContainerDataPtr(jc) = jbc;
22842425
jc->len = len;
2285-
jc->size = jbc->header & JB_CMASK;
2286-
jc->type = jbc->header & JB_FOBJECT ? jbvObject :
2287-
jbc->header & JB_FSCALAR ? jbvArray | jbvScalar :
2288-
jbvArray;
2426+
jsonbInitContainerFromHeader(jc, jbc);
22892427
}
22902428

22912429
static void
@@ -2399,10 +2537,7 @@ jsonbzInitContainer(JsonContainerData *jc, CompressedJsonb *cjb, int len)
23992537

24002538
jc->ops = &jsonbzContainerOps;
24012539
jc->len = len;
2402-
jc->size = jbc->header & JB_CMASK;
2403-
jc->type = jbc->header & JB_FOBJECT ? jbvObject :
2404-
jbc->header & JB_FSCALAR ? jbvArray | jbvScalar :
2405-
jbvArray;
2540+
jsonbInitContainerFromHeader(jc, jbc);
24062541
}
24072542

24082543
static JsonbContainer *
@@ -2470,7 +2605,9 @@ findValueInCompressedJsonbObject(CompressedJsonb *cjb, const char *keystr, int k
24702605
JEntry *children = container->children;
24712606
int count = container->header & JB_CMASK;
24722607
/* Since this is an object, account for *Pairs* of Jentrys */
2473-
char *base_addr = (char *) (children + count * 2);
2608+
bool sorted_values = (container->header & JB_TMASK) == JB_TOBJECT_SORTED;
2609+
char *base_addr = (char *) (children + count * 2) + (sorted_values ? sizeof(uint32) * count : 0);
2610+
uint32 *kvmap = sorted_values ? &container->children[count * 2] : NULL;
24742611
Size base_offset = base_addr - (char *) jb;
24752612
uint32 stopLow = 0,
24762613
stopHigh = count;
@@ -2512,7 +2649,7 @@ findValueInCompressedJsonbObject(CompressedJsonb *cjb, const char *keystr, int k
25122649
if (difference == 0)
25132650
{
25142651
/* Found our key, return corresponding value */
2515-
int index = stopMiddle + count;
2652+
int index = (sorted_values ? kvmap[stopMiddle] : stopMiddle) + count;
25162653

25172654
return fillCompressedJsonbValue(cjb, container, index, base_addr,
25182655
getJsonbOffset(container, index),

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy