Skip to content

Commit 5df33e2

Browse files
author
Nikita Glukhov
committed
Sort jsonb object values by length
1 parent 8c8ced1 commit 5df33e2

File tree

2 files changed

+173
-29
lines changed

2 files changed

+173
-29
lines changed

src/backend/utils/adt/jsonb_util.c

Lines changed: 164 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
#include "utils/memutils.h"
3131
#include "utils/varlena.h"
3232

33+
#define JSONB_SORTED_VALUES 1
34+
3335
/*
3436
* Maximum number of elements in an array (or key/value pairs in an object).
3537
* This is limited by two things: the size of the JEntry array must fit
@@ -82,6 +84,7 @@ typedef struct jsonbIterator
8284
const JEntry *children; /* JEntrys for child nodes */
8385
/* Data proper. This points to the beginning of the variable-length data */
8486
char *dataProper;
87+
uint32 *kvMap;
8588

8689
/* Current item in buffer (up to nElems) */
8790
int curIndex;
@@ -556,6 +559,8 @@ jsonbFindKeyInObject(JsonContainer *jsc, const char *keyVal, int keyLen,
556559
const JEntry *children = container->children;
557560
int count = JsonContainerSize(jsc);
558561
char *baseAddr;
562+
bool sorted_values = (container->header & JBC_TMASK) == JBC_TOBJECT_SORTED;
563+
const uint32 *kvmap;
559564
uint32 stopLow,
560565
stopHigh;
561566

@@ -569,7 +574,16 @@ jsonbFindKeyInObject(JsonContainer *jsc, const char *keyVal, int keyLen,
569574
* Binary search the container. Since we know this is an object, account
570575
* for *Pairs* of Jentrys
571576
*/
572-
baseAddr = (char *) (children + count * 2);
577+
if (sorted_values)
578+
{
579+
kvmap = &children[count * 2];
580+
baseAddr = (char *) &kvmap[count];
581+
}
582+
else
583+
{
584+
kvmap = NULL;
585+
baseAddr = (char *) (children + count * 2);
586+
}
573587
stopLow = 0;
574588
stopHigh = count;
575589
while (stopLow < stopHigh)
@@ -590,7 +604,7 @@ jsonbFindKeyInObject(JsonContainer *jsc, const char *keyVal, int keyLen,
590604
if (difference == 0)
591605
{
592606
/* Found our key, return corresponding value */
593-
int index = stopMiddle + count;
607+
int index = (sorted_values ? kvmap[stopMiddle] : stopMiddle) + count;
594608

595609
if (!res)
596610
res = palloc(sizeof(JsonbValue));
@@ -1203,14 +1217,18 @@ jsonbIteratorNext(JsonIterator **jsit, JsonbValue *val, bool skipNested)
12031217
(*it)->state = JBI_OBJECT_KEY;
12041218

12051219
fillCompressedJsonbValue((*it)->compressed, (*it)->container,
1206-
(*it)->curIndex + (*it)->nElems,
1207-
(*it)->dataProper, (*it)->curValueOffset,
1220+
((*it)->kvMap ? (*it)->kvMap[(*it)->curIndex] : (*it)->curIndex) + (*it)->nElems,
1221+
(*it)->dataProper,
1222+
(*it)->kvMap ?
1223+
getJsonbOffset((*it)->container, (*it)->kvMap[(*it)->curIndex] + (*it)->nElems) :
1224+
(*it)->curValueOffset,
12081225
val);
12091226

12101227
JBE_ADVANCE_OFFSET((*it)->curDataOffset,
12111228
(*it)->children[(*it)->curIndex]);
1212-
JBE_ADVANCE_OFFSET((*it)->curValueOffset,
1213-
(*it)->children[(*it)->curIndex + (*it)->nElems]);
1229+
if (!(*it)->kvMap)
1230+
JBE_ADVANCE_OFFSET((*it)->curValueOffset,
1231+
(*it)->children[(*it)->curIndex + (*it)->nElems]);
12141232
(*it)->curIndex++;
12151233

12161234
/*
@@ -1263,24 +1281,34 @@ jsonbIteratorInitExt(JsonContainer *cont,
12631281
/* Array starts just after header */
12641282
it->children = container->children;
12651283

1266-
switch (container->header & (JBC_FARRAY | JBC_FOBJECT))
1284+
switch (container->header & JBC_TMASK)
12671285
{
1268-
case JBC_FARRAY:
1286+
case JBC_TSCALAR:
1287+
it->isScalar = true;
1288+
/* FALLTHROUGH */
1289+
case JBC_TARRAY:
12691290
it->dataProper =
12701291
(char *) it->children + it->nElems * sizeof(JEntry);
1271-
it->isScalar = (container->header & JBC_FSCALAR) != 0;
12721292
/* This is either a "raw scalar", or an array */
12731293
Assert(!it->isScalar || it->nElems == 1);
12741294

12751295
it->state = JBI_ARRAY_START;
12761296
break;
12771297

1278-
case JBC_FOBJECT:
1298+
case JBC_TOBJECT:
1299+
it->kvMap = NULL;
12791300
it->dataProper =
12801301
(char *) it->children + it->nElems * sizeof(JEntry) * 2;
12811302
it->state = JBI_OBJECT_START;
12821303
break;
12831304

1305+
case JBC_TOBJECT_SORTED:
1306+
it->kvMap = (uint32 *)
1307+
((char *) it->children + it->nElems * sizeof(JEntry) * 2);
1308+
it->dataProper = (char *) &it->kvMap[it->nElems];
1309+
it->state = JBI_OBJECT_START;
1310+
break;
1311+
12841312
default:
12851313
elog(ERROR, "unknown type of jsonb container");
12861314
}
@@ -1887,13 +1915,14 @@ convertJsonbArray(StringInfo buffer, JEntry *pheader, const JsonbValue *val, int
18871915
* Construct the header Jentry and store it in the beginning of the
18881916
* variable-length payload.
18891917
*/
1890-
header = nElems | JBC_FARRAY;
18911918
if (val->val.array.rawScalar)
18921919
{
18931920
Assert(nElems == 1);
18941921
Assert(level == 0);
1895-
header |= JBC_FSCALAR;
1922+
header = nElems | JBC_TSCALAR;
18961923
}
1924+
else
1925+
header = nElems | JBC_TARRAY;
18971926

18981927
appendToBuffer(buffer, (char *) &header, sizeof(uint32));
18991928

@@ -1951,6 +1980,48 @@ convertJsonbArray(StringInfo buffer, JEntry *pheader, const JsonbValue *val, int
19511980
*pheader = JENTRY_ISCONTAINER | totallen;
19521981
}
19531982

1983+
static int
1984+
int_cmp(const void *a, const void *b)
1985+
{
1986+
int x = *(const int *) a;
1987+
int y = *(const int *) b;
1988+
1989+
return x == y ? 0 : x > y ? 1 : -1;
1990+
}
1991+
1992+
static int
1993+
estimateJsonbValueSize(const JsonbValue *jbv)
1994+
{
1995+
int size;
1996+
1997+
switch (jbv->type)
1998+
{
1999+
case jbvNull:
2000+
case jbvBool:
2001+
return 0;
2002+
case jbvString:
2003+
return jbv->val.string.len;
2004+
case jbvNumeric:
2005+
return VARSIZE_ANY(jbv->val.numeric);
2006+
case jbvArray:
2007+
size = offsetof(JsonbContainerHeader, children[jbv->val.array.nElems]);
2008+
for (int i = 0; i < jbv->val.array.nElems; i++)
2009+
size += estimateJsonbValueSize(&jbv->val.array.elems[i]);
2010+
return size;
2011+
case jbvObject:
2012+
size = offsetof(JsonbContainerHeader, children[jbv->val.object.nPairs * 2]);
2013+
for (int i = 0; i < jbv->val.object.nPairs; i++)
2014+
{
2015+
size += estimateJsonbValueSize(&jbv->val.object.pairs[i].key);
2016+
size += estimateJsonbValueSize(&jbv->val.object.pairs[i].value);
2017+
}
2018+
return size;
2019+
default:
2020+
elog(ERROR, "invalid jsonb value type: %d", jbv->type);
2021+
return 0;
2022+
}
2023+
}
2024+
19542025
static void
19552026
convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, int level)
19562027
{
@@ -1960,9 +2031,39 @@ convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, in
19602031
int totallen;
19612032
uint32 header;
19622033
int nPairs = val->val.object.nPairs;
2034+
int reserved_size;
2035+
bool sorted_values = JSONB_SORTED_VALUES && nPairs > 1;
2036+
struct
2037+
{
2038+
int size;
2039+
int32 index;
2040+
} *values = sorted_values ? palloc(sizeof(*values) * nPairs) : NULL;
19632041

19642042
Assert(nPairs >= 0);
19652043

2044+
if (sorted_values)
2045+
{
2046+
for (i = 0; i < nPairs; i++)
2047+
{
2048+
values[i].index = i;
2049+
values[i].size = estimateJsonbValueSize(&val->val.object.pairs[i].value);
2050+
}
2051+
2052+
qsort(values, nPairs, sizeof(*values), int_cmp);
2053+
2054+
/* check if keys were really moved */
2055+
sorted_values = false;
2056+
2057+
for (i = 0; i < nPairs; i++)
2058+
{
2059+
if (values[i].index != i)
2060+
{
2061+
sorted_values = true;
2062+
break;
2063+
}
2064+
}
2065+
}
2066+
19662067
/* Remember where in the buffer this object starts. */
19672068
base_offset = buffer->len;
19682069

@@ -1973,17 +2074,30 @@ convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, in
19732074
* Construct the header Jentry and store it in the beginning of the
19742075
* variable-length payload.
19752076
*/
1976-
header = nPairs | JBC_FOBJECT;
2077+
header = nPairs | (sorted_values ? JBC_TOBJECT_SORTED : JBC_TOBJECT);
19772078
appendToBuffer(buffer, (char *) &header, sizeof(uint32));
19782079

19792080
/* Reserve space for the JEntries of the keys and values. */
1980-
jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nPairs * 2);
2081+
reserved_size = sizeof(JEntry) * nPairs * 2;
2082+
if (sorted_values)
2083+
reserved_size += sizeof(int32) * nPairs;
2084+
2085+
jentry_offset = reserveFromBuffer(buffer, reserved_size);
2086+
2087+
/* Write key-value map */
2088+
if (sorted_values)
2089+
{
2090+
for (i = 0; i < nPairs; i++)
2091+
copyToBuffer(buffer, jentry_offset + sizeof(JEntry) * nPairs * 2 + values[i].index * sizeof(int32),
2092+
(void *) &i, sizeof(int32));
2093+
}
19812094

19822095
/*
19832096
* Iterate over the keys, then over the values, since that is the ordering
19842097
* we want in the on-disk representation.
19852098
*/
19862099
totallen = 0;
2100+
19872101
for (i = 0; i < nPairs; i++)
19882102
{
19892103
JsonbPair *pair = &val->val.object.pairs[i];
@@ -2019,9 +2133,11 @@ convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, in
20192133
copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
20202134
jentry_offset += sizeof(JEntry);
20212135
}
2136+
20222137
for (i = 0; i < nPairs; i++)
20232138
{
2024-
JsonbPair *pair = &val->val.object.pairs[i];
2139+
int val_index = sorted_values ? values[i].index : i;
2140+
JsonbPair *pair = &val->val.object.pairs[val_index];
20252141
int len;
20262142
JEntry meta;
20272143

@@ -2055,6 +2171,9 @@ convertJsonbObject(StringInfo buffer, JEntry *pheader, const JsonbValue *val, in
20552171
jentry_offset += sizeof(JEntry);
20562172
}
20572173

2174+
if (values)
2175+
pfree(values);
2176+
20582177
/* Total data size is everything we've appended to buffer */
20592178
totallen = buffer->len - base_offset;
20602179

@@ -2280,17 +2399,36 @@ uniqueifyJsonbObject(JsonbValue *object, bool unique_keys, bool skip_nulls)
22802399
}
22812400
}
22822401

2402+
static void
2403+
jsonbInitContainerFromHeader(JsonContainerData *jc, JsonbContainerHeader *jbc)
2404+
{
2405+
jc->size = jbc->header & JBC_CMASK;
2406+
switch (jbc->header & JBC_TMASK)
2407+
{
2408+
case JBC_TOBJECT:
2409+
case JBC_TOBJECT_SORTED:
2410+
jc->type = jbvObject;
2411+
break;
2412+
case JBC_TARRAY:
2413+
jc->type = jbvArray;
2414+
break;
2415+
case JBC_TSCALAR:
2416+
jc->type = jbvArray | jbvScalar;
2417+
break;
2418+
default:
2419+
elog(ERROR, "invalid jsonb container type: %d",
2420+
jbc->header & JBC_TMASK);
2421+
}
2422+
}
2423+
22832424
static void
22842425
jsonbInitContainer(JsonContainerData *jc, JsonbContainerHeader *jbc, int len)
22852426
{
22862427
jc->ops = &jsonbContainerOps;
22872428
JsonContainerDataPtr(jc) = jbc;
22882429
jc->len = len;
22892430
jc->toasterid = InvalidOid;
2290-
jc->size = jbc->header & JBC_CMASK;
2291-
jc->type = jbc->header & JBC_FOBJECT ? jbvObject :
2292-
jbc->header & JBC_FSCALAR ? jbvArray | jbvScalar :
2293-
jbvArray;
2431+
jsonbInitContainerFromHeader(jc, jbc);
22942432
}
22952433

22962434
static void
@@ -2406,10 +2544,7 @@ jsonbzInitContainer(JsonContainerData *jc, CompressedJsonb *cjb, int len)
24062544

24072545
jc->ops = &jsonbzContainerOps;
24082546
jc->len = len;
2409-
jc->size = jbc->header & JBC_CMASK;
2410-
jc->type = jbc->header & JBC_FOBJECT ? jbvObject :
2411-
jbc->header & JBC_FSCALAR ? jbvArray | jbvScalar :
2412-
jbvArray;
2547+
jsonbInitContainerFromHeader(jc, jbc);
24132548
}
24142549

24152550
static JsonbContainer *
@@ -2479,12 +2614,15 @@ findValueInCompressedJsonbObject(CompressedJsonb *cjb, const char *keystr, int k
24792614
JEntry *children = container->children;
24802615
int count = container->header & JBC_CMASK;
24812616
/* Since this is an object, account for *Pairs* of Jentrys */
2482-
char *base_addr = (char *) (children + count * 2);
2617+
bool sorted_values = (container->header & JBC_TMASK) == JBC_TOBJECT_SORTED;
2618+
char *base_addr = (char *) (children + count * 2) + (sorted_values ? sizeof(uint32) * count : 0);
2619+
uint32 *kvmap = sorted_values ? &container->children[count * 2] : NULL;
24832620
Size base_offset = base_addr - (char *) jb;
24842621
uint32 stopLow = 0,
24852622
stopHigh = count;
24862623

2487-
Assert(jb->root.header & JB_FOBJECT);
2624+
Assert((jb->root.header & JBC_TMASK) == JBC_TOBJECT ||
2625+
(jb->root.header & JBC_TMASK) == JBC_TOBJECT_SORTED);
24882626

24892627
/* Quick out if object/array is empty */
24902628
if (count <= 0)
@@ -2521,7 +2659,7 @@ findValueInCompressedJsonbObject(CompressedJsonb *cjb, const char *keystr, int k
25212659
if (difference == 0)
25222660
{
25232661
/* Found our key, return corresponding value */
2524-
int index = stopMiddle + count;
2662+
int index = (sorted_values ? kvmap[stopMiddle] : stopMiddle) + count;
25252663

25262664
if (!res)
25272665
res = palloc(sizeof(*res));

src/include/utils/jsonb_internals.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,9 +144,15 @@ typedef struct JsonbContainerHeader
144144

145145
/* flags for the header-field in JsonbContainer */
146146
#define JBC_CMASK 0x0FFFFFFF /* mask for count field */
147-
#define JBC_FSCALAR 0x10000000 /* flag bits */
148-
#define JBC_FOBJECT 0x20000000
149-
#define JBC_FARRAY 0x40000000
147+
#define JBC_TMASK 0x70000000 /* mask for container type */
148+
/* container types */
149+
#define JBC_TOBJECT 0x20000000 /* object with key-value pairs
150+
* sorted by key length-alpha */
151+
#define JBC_TOBJECT_SORTED 0x30000000 /* object with keys sorted by
152+
* length-alpha; values sorted by
153+
* length */
154+
#define JBC_TARRAY 0x40000000 /* array */
155+
#define JBC_TSCALAR 0x50000000 /* scalar pseudo-array */
150156

151157
/* The top-level on-disk format for a jsonb datum. */
152158
typedef struct

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy