From fe68f11b61f53d98eab544fa29baf80d43ffeff9 Mon Sep 17 00:00:00 2001 From: Nikita Glukhov Date: Thu, 20 Oct 2016 01:45:51 +0300 Subject: [PATCH 1/4] Fix unused variable warnings --- jsquery_op.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jsquery_op.c b/jsquery_op.c index 498884a..15eead5 100644 --- a/jsquery_op.c +++ b/jsquery_op.c @@ -449,7 +449,7 @@ recursiveExecute(JsQueryItem *jsq, JsonbValue *jb, JsQueryItem *jsqLeftArg) if (JsonbType(jb) == jbvScalar) { JsonbIterator *it; - int32 r; + int32 r PG_USED_FOR_ASSERTS_ONLY; JsonbValue v; it = JsonbIteratorInit(jb->val.binary.data); @@ -572,7 +572,7 @@ recursiveExecute(JsQueryItem *jsq, JsonbValue *jb, JsQueryItem *jsqLeftArg) if (JsonbType(jb) == jbvScalar) { JsonbIterator *it; - int32 r; + int32 r PG_USED_FOR_ASSERTS_ONLY; JsonbValue v; it = JsonbIteratorInit(jb->val.binary.data); From c624498385bb0708da4844d7fb1106df215b2b32 Mon Sep 17 00:00:00 2001 From: Nikita Glukhov Date: Thu, 20 Oct 2016 01:45:23 +0300 Subject: [PATCH 2/4] Port to jsonbc --- jsquery.h | 1 + jsquery_op.c | 56 ++++++++++++++++++++++++++++++++-------------------- 2 files changed, 36 insertions(+), 21 deletions(-) diff --git a/jsquery.h b/jsquery.h index b2b4d0f..69bbedf 100644 --- a/jsquery.h +++ b/jsquery.h @@ -19,6 +19,7 @@ #include "fmgr.h" #include "utils/numeric.h" #include "utils/jsonb.h" +#include "utils/json_generic.h" typedef struct { diff --git a/jsquery_op.c b/jsquery_op.c index 15eead5..ee92dc2 100644 --- a/jsquery_op.c +++ b/jsquery_op.c @@ -43,7 +43,6 @@ compareNumeric(Numeric a, Numeric b) ); } -#define jbvScalar jbvBinary static int JsonbType(JsonbValue *jb) { @@ -52,15 +51,9 @@ JsonbType(JsonbValue *jb) if (jb->type == jbvBinary) { JsonbContainer *jbc = jb->val.binary.data; - - if (jbc->header & JB_FSCALAR) + type = jbc->type; + if (type == (jbvArray | jbvScalar)) type = jbvScalar; - else if (jbc->header & JB_FOBJECT) - type = jbvObject; - else if (jbc->header & JB_FARRAY) - type = jbvArray; - else - elog(ERROR, "Unknown container type: 0x%08x", jbc->header); } return type; @@ -174,18 +167,22 @@ checkArrayEquality(JsQueryItem *jsq, JsonbValue *jb) JsonbIterator *it; JsonbValue v; JsQueryItem elem; + int nelems; if (!(jsq->type == jqiArray && JsonbType(jb) == jbvArray)) return false; + nelems = JsonContainerSize(jb->val.binary.data); + if (nelems < 0) + nelems = JsonGetArraySize(jb->val.binary.data); + + if (nelems != jsq->array.nelems) + return false; it = JsonbIteratorInit(jb->val.binary.data); r = JsonbIteratorNext(&it, &v, true); Assert(r == WJB_BEGIN_ARRAY); - if (v.val.array.nElems != jsq->array.nelems) - return false; - while((r = JsonbIteratorNext(&it, &v, true)) != WJB_DONE) { if (r != WJB_ELEM) @@ -338,7 +335,28 @@ executeExpr(JsQueryItem *jsq, int32 op, JsonbValue *jb, JsQueryItem *jsqLeftArg) r = JsonbIteratorNext(&it, &v, true); Assert(r == WJB_BEGIN_ARRAY || r == WJB_BEGIN_OBJECT); - length = (r == WJB_BEGIN_ARRAY) ? v.val.array.nElems : v.val.object.nPairs; + if (r == WJB_BEGIN_ARRAY) + { + length = v.val.array.nElems; + + if (length < 0) + length = JsonGetArraySize(jb->val.binary.data); + } + else + { + length = v.val.object.nPairs; + + if (length < 0) + { + length = 0; + + while ((r = JsonbIteratorNext(&it, &v, true)) != WJB_DONE) + { + if (r == WJB_KEY) + length++; + } + } + } v.type = jbvNumeric; v.val.numeric = DatumGetNumeric(DirectFunctionCall1(int4_numeric, Int32GetDatum(length))); @@ -609,16 +627,14 @@ jsquery_json_exec(PG_FUNCTION_ARGS) JsonbValue jbv; JsQueryItem jsq; - jbv.type = jbvBinary; - jbv.val.binary.data = &jb->root; - jbv.val.binary.len = VARSIZE_ANY_EXHDR(jb); + JsonValueInitBinary(&jbv, &jb->root); jsqInit(&jsq, jq); res = recursiveExecute(&jsq, &jbv, NULL); PG_FREE_IF_COPY(jq, 0); - PG_FREE_IF_COPY(jb, 1); + PG_FREE_IF_COPY_JSONB(jb, 1); PG_RETURN_BOOL(res); } @@ -633,15 +649,13 @@ json_jsquery_exec(PG_FUNCTION_ARGS) JsonbValue jbv; JsQueryItem jsq; - jbv.type = jbvBinary; - jbv.val.binary.data = &jb->root; - jbv.val.binary.len = VARSIZE_ANY_EXHDR(jb); + JsonValueInitBinary(&jbv, &jb->root); jsqInit(&jsq, jq); res = recursiveExecute(&jsq, &jbv, NULL); - PG_FREE_IF_COPY(jb, 0); + PG_FREE_IF_COPY_JSONB(jb, 0); PG_FREE_IF_COPY(jq, 1); PG_RETURN_BOOL(res); From d57758a602de7e6da4aa0838304396aa532390d9 Mon Sep 17 00:00:00 2001 From: Nikita Glukhov Date: Tue, 1 Nov 2016 23:00:38 +0300 Subject: [PATCH 3/4] fixup! add array/object length as @# --- expected/jsquery.out | 24 ++++++ jsquery_op.c | 193 +++++++++++++++++++------------------------ sql/jsquery.sql | 4 + 3 files changed, 115 insertions(+), 106 deletions(-) diff --git a/expected/jsquery.out b/expected/jsquery.out index a550b9a..d86b0b4 100644 --- a/expected/jsquery.out +++ b/expected/jsquery.out @@ -1391,6 +1391,30 @@ select '{"a":[1,2]}' @@ '@# = 1'::jsquery; t (1 row) +select '[]' @@ '@# is numeric'::jsquery; + ?column? +---------- + t +(1 row) + +select '{}' @@ '@# is numeric'::jsquery; + ?column? +---------- + t +(1 row) + +select '0' @@ '@# is numeric'::jsquery; + ?column? +---------- + f +(1 row) + +select '0' @@ '@# = 1'::jsquery; + ?column? +---------- + f +(1 row) + --ALL select 'a.*: = 4'::jsquery; jsquery diff --git a/jsquery_op.c b/jsquery_op.c index ee92dc2..432e93e 100644 --- a/jsquery_op.c +++ b/jsquery_op.c @@ -29,7 +29,7 @@ #include "jsquery.h" -static bool recursiveExecute(JsQueryItem *jsq, JsonbValue *jb, JsQueryItem *jsqLeftArg); +static bool recursiveExecute(JsQueryItem *jsq, JsonbValue *jb); static int compareNumeric(Numeric a, Numeric b) @@ -59,6 +59,43 @@ JsonbType(JsonbValue *jb) return type; } +static JsonbValue * +JsonbSize(JsonbValue *jb, JsonbValue *size) +{ + JsonbValue v; + JsonbIterator *it; + JsonbIteratorToken r; + int32 length; + int type = JsonbType(jb); + + if (type != jbvArray && type != jbvObject) + return NULL; + + Assert(jb->type == jbvBinary); + + it = JsonbIteratorInit(jb->val.binary.data); + r = JsonbIteratorNext(&it, &v, true); + Assert(r == WJB_BEGIN_ARRAY || r == WJB_BEGIN_OBJECT); + + if (r == WJB_BEGIN_ARRAY) + { + length = v.val.array.nElems; + if (length < 0) + length = JsonGetArraySize(jb->val.binary.data); + } + else + { + length = v.val.object.nPairs; + if (length < 0) + length = JsonGetObjectSize(jb->val.binary.data); + } + + size->type = jbvNumeric; + size->val.numeric = DatumGetNumeric(DirectFunctionCall1( + int4_numeric, Int32GetDatum(length))); + return size; +} + static bool recursiveAny(JsQueryItem *jsq, JsonbValue *jb) { @@ -81,7 +118,7 @@ recursiveAny(JsQueryItem *jsq, JsonbValue *jb) if (r == WJB_VALUE || r == WJB_ELEM) { - res = recursiveExecute(jsq, &v, NULL); + res = recursiveExecute(jsq, &v); if (res == false && v.type == jbvBinary) res = recursiveAny(jsq, &v); @@ -113,7 +150,7 @@ recursiveAll(JsQueryItem *jsq, JsonbValue *jb) if (r == WJB_VALUE || r == WJB_ELEM) { - if ((res = recursiveExecute(jsq, &v, NULL)) == true) + if ((res = recursiveExecute(jsq, &v)) == true) { if (v.type == jbvBinary) res = recursiveAll(jsq, &v); @@ -312,7 +349,7 @@ makeCompare(JsQueryItem *jsq, int32 op, JsonbValue *jb) } static bool -executeExpr(JsQueryItem *jsq, int32 op, JsonbValue *jb, JsQueryItem *jsqLeftArg) +executeExpr(JsQueryItem *jsq, int32 op, JsonbValue *jb) { bool res = false; /* @@ -322,100 +359,37 @@ executeExpr(JsQueryItem *jsq, int32 op, JsonbValue *jb, JsQueryItem *jsqLeftArg) Assert(jsq->type == jqiAny || jsq->type == jqiString || jsq->type == jqiNumeric || jsq->type == jqiNull || jsq->type == jqiBool || jsq->type == jqiArray); - if (jsqLeftArg && jsqLeftArg->type == jqiLength) + switch(op) { - if (JsonbType(jb) == jbvArray || JsonbType(jb) == jbvObject) - { - int32 length; - JsonbIterator *it; - JsonbValue v; - int r; - - it = JsonbIteratorInit(jb->val.binary.data); - r = JsonbIteratorNext(&it, &v, true); - Assert(r == WJB_BEGIN_ARRAY || r == WJB_BEGIN_OBJECT); - - if (r == WJB_BEGIN_ARRAY) - { - length = v.val.array.nElems; - - if (length < 0) - length = JsonGetArraySize(jb->val.binary.data); - } + case jqiEqual: + if (JsonbType(jb) == jbvArray && jsq->type == jqiArray) + res = checkArrayEquality(jsq, jb); else - { - length = v.val.object.nPairs; - - if (length < 0) - { - length = 0; - - while ((r = JsonbIteratorNext(&it, &v, true)) != WJB_DONE) - { - if (r == WJB_KEY) - length++; - } - } - } - - v.type = jbvNumeric; - v.val.numeric = DatumGetNumeric(DirectFunctionCall1(int4_numeric, Int32GetDatum(length))); - - switch(op) - { - case jqiEqual: - case jqiLess: - case jqiGreater: - case jqiLessOrEqual: - case jqiGreaterOrEqual: - res = makeCompare(jsq, op, &v); - break; - case jqiIn: - res = checkScalarIn(jsq, &v); - break; - case jqiOverlap: - case jqiContains: - case jqiContained: - break; - default: - elog(ERROR, "Unknown operation"); - } - } - } - else - { - switch(op) - { - case jqiEqual: - if (JsonbType(jb) == jbvArray && jsq->type == jqiArray) - res = checkArrayEquality(jsq, jb); - else - res = checkScalarEquality(jsq, jb); - break; - case jqiIn: - res = checkScalarIn(jsq, jb); - break; - case jqiOverlap: - case jqiContains: - case jqiContained: - res = executeArrayOp(jsq, op, jb); - break; - case jqiLess: - case jqiGreater: - case jqiLessOrEqual: - case jqiGreaterOrEqual: - res = makeCompare(jsq, op, jb); - break; - default: - elog(ERROR, "Unknown operation"); - } + res = checkScalarEquality(jsq, jb); + break; + case jqiIn: + res = checkScalarIn(jsq, jb); + break; + case jqiOverlap: + case jqiContains: + case jqiContained: + res = executeArrayOp(jsq, op, jb); + break; + case jqiLess: + case jqiGreater: + case jqiLessOrEqual: + case jqiGreaterOrEqual: + res = makeCompare(jsq, op, jb); + break; + default: + elog(ERROR, "Unknown operation"); } return res; } static bool -recursiveExecute(JsQueryItem *jsq, JsonbValue *jb, JsQueryItem *jsqLeftArg) +recursiveExecute(JsQueryItem *jsq, JsonbValue *jb) { JsQueryItem elem; bool res = false; @@ -425,25 +399,25 @@ recursiveExecute(JsQueryItem *jsq, JsonbValue *jb, JsQueryItem *jsqLeftArg) switch(jsq->type) { case jqiAnd: jsqGetLeftArg(jsq, &elem); - res = recursiveExecute(&elem, jb, jsqLeftArg); + res = recursiveExecute(&elem, jb); if (res == true) { jsqGetRightArg(jsq, &elem); - res = recursiveExecute(&elem, jb, jsqLeftArg); + res = recursiveExecute(&elem, jb); } break; case jqiOr: jsqGetLeftArg(jsq, &elem); - res = recursiveExecute(&elem, jb, jsqLeftArg); + res = recursiveExecute(&elem, jb); if (res == false) { jsqGetRightArg(jsq, &elem); - res = recursiveExecute(&elem, jb, jsqLeftArg); + res = recursiveExecute(&elem, jb); } break; case jqiNot: jsqGetArg(jsq, &elem); - res = !recursiveExecute(&elem, jb, jsqLeftArg); + res = !recursiveExecute(&elem, jb); break; case jqiKey: if (JsonbType(jb) == jbvObject) { @@ -457,7 +431,7 @@ recursiveExecute(JsQueryItem *jsq, JsonbValue *jb, JsQueryItem *jsqLeftArg) if (v != NULL) { jsqGetNext(jsq, &elem); - res = recursiveExecute(&elem, v, NULL); + res = recursiveExecute(&elem, v); pfree(v); } } @@ -480,23 +454,23 @@ recursiveExecute(JsQueryItem *jsq, JsonbValue *jb, JsQueryItem *jsqLeftArg) r = JsonbIteratorNext(&it, &v, true); Assert(r == WJB_ELEM); - res = recursiveExecute(&elem, &v, jsqLeftArg); + res = recursiveExecute(&elem, &v); } else { - res = recursiveExecute(&elem, jb, jsqLeftArg); + res = recursiveExecute(&elem, jb); } break; case jqiAny: jsqGetNext(jsq, &elem); - if (recursiveExecute(&elem, jb, NULL)) + if (recursiveExecute(&elem, jb)) res = true; else if (jb->type == jbvBinary) res = recursiveAny(&elem, jb); break; case jqiAll: jsqGetNext(jsq, &elem); - if ((res = recursiveExecute(&elem, jb, NULL)) == true) + if ((res = recursiveExecute(&elem, jb)) == true) { if (jb->type == jbvBinary) res = recursiveAll(&elem, jb); @@ -520,7 +494,7 @@ recursiveExecute(JsQueryItem *jsq, JsonbValue *jb, JsQueryItem *jsqLeftArg) { if (r == WJB_ELEM) { - res = recursiveExecute(&elem, &v, NULL); + res = recursiveExecute(&elem, &v); if (jsq->type == jqiAnyArray) { @@ -554,7 +528,7 @@ recursiveExecute(JsQueryItem *jsq, JsonbValue *jb, JsQueryItem *jsqLeftArg) { if (r == WJB_VALUE) { - res = recursiveExecute(&elem, &v, NULL); + res = recursiveExecute(&elem, &v); if (jsq->type == jqiAnyKey) { @@ -580,12 +554,19 @@ recursiveExecute(JsQueryItem *jsq, JsonbValue *jb, JsQueryItem *jsqLeftArg) case jqiContained: case jqiOverlap: jsqGetArg(jsq, &elem); - res = executeExpr(&elem, jsq->type, jb, jsqLeftArg); + res = executeExpr(&elem, jsq->type, jb); break; case jqiLength: + { + JsonbValue size; jsqGetNext(jsq, &elem); - res = recursiveExecute(&elem, jb, jsq); + if (JsonbSize(jb, &size)) + { + res = recursiveExecute(&elem, &size); + pfree(size.val.numeric); + } break; + } case jqiIs: if (JsonbType(jb) == jbvScalar) { @@ -631,7 +612,7 @@ jsquery_json_exec(PG_FUNCTION_ARGS) jsqInit(&jsq, jq); - res = recursiveExecute(&jsq, &jbv, NULL); + res = recursiveExecute(&jsq, &jbv); PG_FREE_IF_COPY(jq, 0); PG_FREE_IF_COPY_JSONB(jb, 1); @@ -653,7 +634,7 @@ json_jsquery_exec(PG_FUNCTION_ARGS) jsqInit(&jsq, jq); - res = recursiveExecute(&jsq, &jbv, NULL); + res = recursiveExecute(&jsq, &jbv); PG_FREE_IF_COPY_JSONB(jb, 0); PG_FREE_IF_COPY(jq, 1); diff --git a/sql/jsquery.sql b/sql/jsquery.sql index 309b790..15889d3 100644 --- a/sql/jsquery.sql +++ b/sql/jsquery.sql @@ -283,6 +283,10 @@ select '{"a":[1,2]}' @@ '%.@# in (2, 4)'::jsquery; select '{"a":[1,2]}' @@ '*.@# in (2, 4)'::jsquery; select '{"a":[1,2]}' @@ '*.@# ($ = 4 or $ = 2)'::jsquery; select '{"a":[1,2]}' @@ '@# = 1'::jsquery; +select '[]' @@ '@# is numeric'::jsquery; +select '{}' @@ '@# is numeric'::jsquery; +select '0' @@ '@# is numeric'::jsquery; +select '0' @@ '@# = 1'::jsquery; --ALL select 'a.*: = 4'::jsquery; From c0da8a546d163d42775291b3523cfd39a2634580 Mon Sep 17 00:00:00 2001 From: Nikita Glukhov Date: Sat, 12 Nov 2016 22:30:02 +0300 Subject: [PATCH 4/4] Add selectivity estimation --- Makefile | 5 +- expected/jsquery_stats.out | 425 +++++++++++++++++++++++++++++++ jsquery--1.0.sql | 9 +- jsquery_selfuncs.c | 495 +++++++++++++++++++++++++++++++++++++ sql/jsquery_stats.sql | 175 +++++++++++++ 5 files changed, 1105 insertions(+), 4 deletions(-) create mode 100644 expected/jsquery_stats.out create mode 100644 jsquery_selfuncs.c create mode 100644 sql/jsquery_stats.sql diff --git a/Makefile b/Makefile index 2559ad9..9961607 100644 --- a/Makefile +++ b/Makefile @@ -2,12 +2,13 @@ MODULE_big = jsquery OBJS = jsonb_gin_ops.o jsquery_constr.o jsquery_extract.o \ - jsquery_gram.o jsquery_io.o jsquery_op.o jsquery_support.o + jsquery_gram.o jsquery_io.o jsquery_op.o jsquery_selfuncs.o \ + jsquery_support.o EXTENSION = jsquery DATA = jsquery--1.0.sql -REGRESS = jsquery +REGRESS = jsquery jsquery_stats # We need a UTF8 database ENCODING = UTF8 diff --git a/expected/jsquery_stats.out b/expected/jsquery_stats.out new file mode 100644 index 0000000..be1f0ff --- /dev/null +++ b/expected/jsquery_stats.out @@ -0,0 +1,425 @@ +DROP FUNCTION IF EXISTS explain_jsonb(text); + +CREATE OR REPLACE FUNCTION explain_jsonb(sql_query text) +RETURNS TABLE(explain_line json) AS +$$ +BEGIN + RETURN QUERY EXECUTE 'EXPLAIN (ANALYZE, FORMAT json) ' || sql_query; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION get_plan_and_actual_rows(sql_query text) +RETURNS TABLE(plan integer, actual integer) AS +$$ + SELECT + (plan->>'Plan Rows')::integer plan, + (plan->>'Actual Rows')::integer actual + FROM ( + SELECT explain_jsonb(sql_query) #> '{0,Plan,Plans,0}' + ) p(plan) +$$ LANGUAGE sql; + +CREATE OR REPLACE FUNCTION check_estimate(sql_query text, accuracy real) +RETURNS boolean AS +$$ + SELECT plan BETWEEN actual / (1 + accuracy) AND (actual + 1) * (1 + accuracy) + FROM (SELECT * FROM get_plan_and_actual_rows(sql_query)) x +$$ LANGUAGE sql; + +CREATE OR REPLACE FUNCTION check_estimate2(sql_query text, accuracy real) +RETURNS TABLE(min integer, max integer) AS +$$ + SELECT (actual * (1 - accuracy))::integer, ((actual + 1) * (1 + accuracy))::integer + FROM (SELECT * FROM get_plan_and_actual_rows(sql_query)) x +$$ LANGUAGE sql; + +DROP TABLE IF EXISTS jsonb_stats_test; +CREATE TABLE jsonb_stats_test(js jsonb); + +INSERT INTO jsonb_stats_test SELECT NULL FROM generate_series(1, 10000); + +INSERT INTO jsonb_stats_test SELECT 'null' FROM generate_series(1, 2000); +INSERT INTO jsonb_stats_test SELECT 'true' FROM generate_series(1, 3000); +INSERT INTO jsonb_stats_test SELECT 'false' FROM generate_series(1, 5000); + +INSERT INTO jsonb_stats_test SELECT '12345' FROM generate_series(1, 1000); +INSERT INTO jsonb_stats_test SELECT (1000 * (i % 10))::text::jsonb FROM generate_series(1, 4000) i; +INSERT INTO jsonb_stats_test SELECT i::text::jsonb FROM generate_series(1, 5000) i; + +INSERT INTO jsonb_stats_test SELECT '"foo"' FROM generate_series(1, 1000); +INSERT INTO jsonb_stats_test SELECT format('"bar%s"', i % 10)::jsonb FROM generate_series(1, 4000) i; +INSERT INTO jsonb_stats_test SELECT format('"baz%s"', i)::jsonb FROM generate_series(1, 5000) i; + +INSERT INTO jsonb_stats_test SELECT '{}' FROM generate_series(1, 1000); +INSERT INTO jsonb_stats_test SELECT jsonb_build_object('foo', 'bar') FROM generate_series(1, 1000); +INSERT INTO jsonb_stats_test SELECT jsonb_build_object('foo', 'baz' || (i % 10)) FROM generate_series(1, 3000) i; +INSERT INTO jsonb_stats_test SELECT jsonb_build_object('foo', i % 10) FROM generate_series(1, 2000) i; +INSERT INTO jsonb_stats_test SELECT jsonb_build_object('"foo \"bar"', i % 10) FROM generate_series(1, 2000) i; + +INSERT INTO jsonb_stats_test SELECT '[]' FROM generate_series(1, 1000); +INSERT INTO jsonb_stats_test SELECT '["foo"]' FROM generate_series(1, 2000); +INSERT INTO jsonb_stats_test SELECT '[12345]' FROM generate_series(1, 3000); +INSERT INTO jsonb_stats_test SELECT '[["foo"]]' FROM generate_series(1, 2000); +INSERT INTO jsonb_stats_test SELECT '[{"key": "foo"}]' FROM generate_series(1, 2000); +INSERT INTO jsonb_stats_test SELECT '[null, "foo"]' FROM generate_series(1, 2000); +INSERT INTO jsonb_stats_test SELECT '[null, 12345]' FROM generate_series(1, 3000); +INSERT INTO jsonb_stats_test SELECT '[null, ["foo"]]' FROM generate_series(1, 2000); +INSERT INTO jsonb_stats_test SELECT '[null, {"key": "foo"}]' FROM generate_series(1, 2000); + +-- Build random variable-length integer arrays +INSERT INTO jsonb_stats_test +SELECT jsonb_build_object('array', + jsonb_build_array()) +FROM generate_series(1, 1000); + +INSERT INTO jsonb_stats_test +SELECT jsonb_build_object('array', + jsonb_build_array( + floor(random() * 10)::int)) +FROM generate_series(1, 4000); + +INSERT INTO jsonb_stats_test +SELECT jsonb_build_object('array', + jsonb_build_array( + floor(random() * 10)::int, + floor(random() * 10)::int)) +FROM generate_series(1, 3000); + +INSERT INTO jsonb_stats_test +SELECT jsonb_build_object('array', + jsonb_build_array( + floor(random() * 10)::int, + floor(random() * 10)::int, + floor(random() * 10)::int)) +FROM generate_series(1, 2000); + + +ANALYZE jsonb_stats_test; + +CREATE OR REPLACE FUNCTION check_jsonb_stats_test_estimate(sql_condition text, accuracy real) +RETURNS boolean AS +$$ + SELECT check_estimate('SELECT count(*) FROM jsonb_stats_test WHERE ' || sql_condition, accuracy) +$$ LANGUAGE sql; + +DROP FUNCTION IF EXISTS check_jsonb_stats_test_estimate2(text, real); + +CREATE OR REPLACE FUNCTION check_jsonb_stats_test_estimate2(sql_condition text, accuracy real) +RETURNS TABLE(plan integer, actual integer) AS +$$ + SELECT get_plan_and_actual_rows('SELECT count(*) FROM jsonb_stats_test WHERE ' || sql_condition) +$$ LANGUAGE sql; + + +SELECT check_jsonb_stats_test_estimate($$js @@ '$ = *'$$, 0.01); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '* = *'$$, 0.01); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate2($$js @@ '% = *'$$, 0.01); + check_jsonb_stats_test_estimate2 +---------------------------------- + (16993,18000) +(1 row) + + +SELECT check_jsonb_stats_test_estimate($$js @@ '$ = null'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '$ = true'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '$ = false'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + + +SELECT check_jsonb_stats_test_estimate($$js @@ '$ IS BOOLEAN'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '$ IS NUMERIC'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '$ IS STRING'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '$ IS OBJECT'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '$ IS ARRAY'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + + +SELECT check_jsonb_stats_test_estimate($$js @@ 'bad_key = *'$$, 0.01); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ 'bad_key = null'$$, 0.01); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '*.bad_key = *'$$, 0.01); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '%.bad_key = *'$$, 0.01); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ 'bad_key.* = *'$$, 0.01); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ 'bad_key.# = *'$$, 0.01); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ 'bad_key.% = *'$$, 0.01); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ 'bad_key1.bad_key2 = *'$$, 0.01); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + + +SELECT check_jsonb_stats_test_estimate($$js @@ 'foo = *'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + + +SELECT check_jsonb_stats_test_estimate($$js @@ '# IS OBJECT'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '#: IS OBJECT'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '# IS ARRAY'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '#: IS ARRAY'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '# IS STRING'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '#: IS STRING'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '# IS NUMERIC'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '#: IS NUMERIC'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + + +SELECT check_jsonb_stats_test_estimate($$js @@ '# = *'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '#: = *'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '# = 3'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '#: = 3'$$, 0.2); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '# = 100'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '#: = 100'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + + +SELECT check_jsonb_stats_test_estimate($$js @@ '@# = 0'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '@# = 1'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '@# = 2'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + + +SELECT check_jsonb_stats_test_estimate($$js @@ '# = "foo"'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '#.# = "foo"'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '#.% = "foo"'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ '#.* = "foo"'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + + +SELECT check_jsonb_stats_test_estimate($$js @@ 'array = *'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + + +SELECT check_jsonb_stats_test_estimate($$js @@ 'array.@# = 0'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ 'array.@# = 1'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ 'array.@# = 2'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ 'array.@# = 3'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ 'array.@# = 4'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate($$js @@ 'array.@# < 3'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + + +SELECT check_jsonb_stats_test_estimate($$js @@ 'array @> [3]'$$, 0.1); + check_jsonb_stats_test_estimate +--------------------------------- + t +(1 row) + +SELECT check_jsonb_stats_test_estimate2($$js @@ 'array @> [3,8]'$$, 1); + check_jsonb_stats_test_estimate2 +---------------------------------- + (31,169) +(1 row) + + diff --git a/jsquery--1.0.sql b/jsquery--1.0.sql index 3bf1d9b..f009cd8 100644 --- a/jsquery--1.0.sql +++ b/jsquery--1.0.sql @@ -30,12 +30,17 @@ CREATE FUNCTION json_jsquery_exec(jsonb, jsquery) AS 'MODULE_PATHNAME' LANGUAGE C STRICT IMMUTABLE; +CREATE FUNCTION jsquery_sel(internal, oid, internal, integer) + RETURNS float8 + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT; + CREATE OPERATOR @@ ( LEFTARG = jsquery, RIGHTARG = jsonb, PROCEDURE = jsquery_json_exec, COMMUTATOR = '@@', - RESTRICT = contsel, + RESTRICT = jsquery_sel, JOIN = contjoinsel ); @@ -44,7 +49,7 @@ CREATE OPERATOR @@ ( RIGHTARG = jsquery, PROCEDURE = json_jsquery_exec, COMMUTATOR = '@@', - RESTRICT = contsel, + RESTRICT = jsquery_sel, JOIN = contjoinsel ); diff --git a/jsquery_selfuncs.c b/jsquery_selfuncs.c new file mode 100644 index 0000000..3eecb57 --- /dev/null +++ b/jsquery_selfuncs.c @@ -0,0 +1,495 @@ +/*------------------------------------------------------------------------- + * + * jsquery_selfuncs.c + * Functions for selectivity estimation of jsquery operators + * + * Copyright (c) 2016, PostgreSQL Global Development Group + * Author: Nikita Glukhov + * + * IDENTIFICATION + * contrib/jsquery/jsquery_selfuncs.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "miscadmin.h" +#include "catalog/pg_operator.h" +#include "utils/json_selfuncs.h" +#include "utils/selfuncs.h" + +#include "jsquery.h" + +#include + +/* Default selectivity constant for "@@" operator */ +#define DEFAULT_JSQUERY_SEL 0.005 + +static Selectivity jsqSelectivity(JsonPathStats stats, + JsQueryItem *jsq, + JsQueryItem *jsqLeftArg); + +static JsonbValue * +JsQueryItemToJsonbValue(JsQueryItem *jsq, JsonbValue *jbv) +{ + switch (jsq->type) + { + case jqiNull: + jbv->type = jbvNull; + break; + + case jqiString: + jbv->type = jbvString; + jbv->val.string.val = jsqGetString(jsq, &jbv->val.string.len); + break; + + case jqiBool: + jbv->type = jbvBool; + jbv->val.boolean = jsqGetBool(jsq); + break; + + case jqiNumeric: + jbv->type = jbvNumeric; + jbv->val.numeric = jsqGetNumeric(jsq); + break; + + default: + elog(ERROR, "Invalid jsquery scalar type %d", jsq->type); + break; + } + + return jbv; +} + +static Selectivity +jsqSelectivityScalarCmp(JsonPathStats stats, JsQueryItem *jsq, int32 op) +{ + JsonbValue jbv; + Oid opr; + + if (jsq->type != jqiNumeric) /* TODO other types support */ + return 0.0; + + if (jsq->type == jqiAny) + return jsonPathStatsGetFreq(stats, DEFAULT_JSQUERY_SEL); + + JsQueryItemToJsonbValue(jsq, &jbv); + + switch (op) + { + case jqiEqual: + opr = JsonbEqOperator; + break; + case jqiLess: + opr = JsonbLtOperator; + break; + case jqiGreater: + opr = JsonbGtOperator; + break; + case jqiLessOrEqual: + opr = JsonbLeOperator; + break; + case jqiGreaterOrEqual: + opr = JsonbGeOperator; + break; + default: + elog(ERROR, "Unknown jsquery operation %d", op); + } + + return jsonSelectivity(stats, JsonbGetDatum(JsonbValueToJsonb(&jbv)), opr); +} + +static Selectivity +jsqSelectivityScalarEq(JsonPathStats stats, JsQueryItem *jsq) +{ + JsonbValue jbv; + Datum jsonb; + + if (jsq->type == jqiAny) + return jsonPathStatsGetFreq(stats, DEFAULT_JSQUERY_SEL); + + jsonb = JsonbGetDatum(JsonbValueToJsonb( + JsQueryItemToJsonbValue(jsq, &jbv))); + + return jsonSelectivity(stats, jsonb, JsonbEqOperator); +} + +static Selectivity +jsqSelectivityArrayEq(JsonPathStats stats, JsQueryItem *jsq) +{ + JsonbParseState *ps = NULL; + JsQueryItem elem; + JsonbValue *jbv; + + if (jsq->type != jqiArray) + return 0.0; + + pushJsonbValue(&ps, WJB_BEGIN_ARRAY, NULL); + + for (jsqIterateInit(jsq); jsqIterateArray(jsq, &elem);) + { + JsonbValue v; + + if (elem.type == jqiAny) + continue; + + pushJsonbValue(&ps, WJB_ELEM, JsQueryItemToJsonbValue(&elem, &v)); + } + + jbv = pushJsonbValue(&ps, WJB_END_ARRAY, NULL); + + return jsonSelectivity(stats, JsonbGetDatum(JsonbValueToJsonb(jbv)), + JsonbEqOperator); +} + +static Selectivity +jsqSelectivityScalarIn(JsonPathStats stats, JsQueryItem *jsq) +{ + JsQueryItem elem; + Selectivity res = 1.0; + + if (jsq->type != jqiArray) + return 0.0; + + for (jsqIterateInit(jsq); res > 0.0 && jsqIterateArray(jsq, &elem);) + res *= 1.0 - jsqSelectivityScalarEq(stats, &elem); + + return 1.0 - res; +} + +static Selectivity +jsqSelectivityArrayAnyAll(JsonPathStats stats, JsQueryItem *jsq, bool any) +{ + JsonPathStats arrstats = jsonPathStatsGetSubpath(stats, NULL, 0); + float4 arrfreq = jsonPathStatsGetTypeFreq(stats, jbvArray, 0.001); + float4 arrsize; + float4 arrsize0freq; + Selectivity res; + + if (!arrstats || arrfreq <= 0.0) + return 0.0; + + arrsize = jsonPathStatsGetAvgArraySize(arrstats); + + if (arrsize == 0.0) + return any ? 0.0 : arrfreq; + + res = jsqSelectivity(arrstats, jsq, NULL); + + res /= arrfreq; + CLAMP_PROBABILITY(res); + + if (any) + res = 1 - res; + + /* TODO use other array size stats */ + arrsize0freq = jsonPathStatsGetArrayIndexSelectivity(arrstats, 0) / arrfreq; + + res = pow(res, arrsize / arrsize0freq); + + if (any) + res = 1 - res; + + res *= arrsize0freq; + + if (!any) + res += 1.0 - arrsize0freq; + + return arrfreq * res; +} + + +static Selectivity +jsqSelectivityKeyAnyAll(JsonPathStats stats, JsQueryItem *jsq, bool any) +{ +/* float4 objfreq = jsonPathStatsGetTypeFreq(stats, jbvObject, 0.0); */ + JsonPathStats keystats = NULL; + Selectivity res = 1.0; + + while (res > 0.0 && jsonPathStatsGetNextKeyStats(stats, &keystats, true)) + { + Selectivity sel = jsqSelectivity(keystats, jsq, NULL); + res *= any ? (1.0 - sel) : sel; + } + + if (any) + res = 1.0 - res; + + return res; +} + +static Selectivity +jsqSelectivityAnyAll(JsonPathStats stats, JsQueryItem *jsq, bool any) +{ + JsQueryItem elem; + Selectivity sel; + Selectivity res; + Selectivity freq = jsonPathStatsGetFreq(stats, 0.0); + + if (freq <= 0.0) + return 0.0; + + jsqGetNext(jsq, &elem); + + sel = jsqSelectivity(stats, &elem, NULL) / freq; + CLAMP_PROBABILITY(sel); + //res = any ? (1.0 - sel) : sel; + res = sel; + + if (res > 0.0 && jsonPathStatsGetTypeFreq(stats, jbvArray, 0.0) > 0.0) + { + sel = jsqSelectivityArrayAnyAll(stats, jsq, any) / freq; + CLAMP_PROBABILITY(sel); + //res *= any ? (1.0 - sel) : sel; + res = any ? res + sel : res * sel; + } + + if (res > 0.0 && jsonPathStatsGetTypeFreq(stats, jbvObject, 0.0) > 0.0) + { + sel = jsqSelectivityKeyAnyAll(stats, jsq, any) / freq; + CLAMP_PROBABILITY(sel); + //res *= any ? (1.0 - sel) : sel; + res = any ? res + sel : res * sel; + } + + //if (any) + // res = 1.0 - res; + CLAMP_PROBABILITY(res); + + return res * freq; +} + +static Selectivity +jsqSelectivityArrayOp(JsonPathStats stats, JsQueryItem *op, JsQueryItem *arg) +{ + JsQueryItem elem; + + if (arg->type != jqiArray) + return 0.0; + + if (op->type == jqiContains || op->type == jqiOverlap) + { + JsQueryItem eq; + Selectivity res = 1.0; + bool contains = op->type == jqiContains; + + eq.type = jqiEqual; + eq.base = arg->base; + + for (jsqIterateInit(arg); res > 0.0 && jsqIterateArray(arg, &elem);) + { + Selectivity sel; + eq.arg = arg->array.arrayPtr[arg->array.current - 1]; /* FIXME */ + sel = jsqSelectivityArrayAnyAll(stats, &eq, true); + res *= contains ? sel : (1.0 - sel); + } + + return contains ? res : (1.0 - res); + } + else + { + JsQueryItem in; + + in.type = jqiIn; + in.base = op->base; + in.arg = op->arg; + + return jsqSelectivityArrayAnyAll(stats, &in, false); + } +} + +static Selectivity +jsqSelectivityExpr(JsonPathStats stats, JsQueryItem *expr, + JsQueryItem *jsqLeftArg) +{ + JsQueryItem arg; + + jsqGetArg(expr, &arg); + + Assert(!jsqGetNext(&arg, NULL)); + Assert(arg.type == jqiAny || arg.type == jqiString || + arg.type == jqiNumeric || arg.type == jqiNull || + arg.type == jqiBool || arg.type == jqiArray); + + switch (expr->type) + { + case jqiEqual: + return arg.type == jqiArray ? jsqSelectivityArrayEq(stats, &arg) + : jsqSelectivityScalarEq(stats, &arg); + + case jqiLess: + case jqiGreater: + case jqiLessOrEqual: + case jqiGreaterOrEqual: + return jsqSelectivityScalarCmp(stats, &arg, expr->type); + + case jqiIn: + return jsqSelectivityScalarIn(stats, &arg); + + case jqiOverlap: + case jqiContains: + case jqiContained: + return jsqSelectivityArrayOp(stats, expr, &arg); + + default: + elog(ERROR, "Unknown operation"); + return DEFAULT_JSQUERY_SEL; + } +} + +static Selectivity +jsqSelectivity(JsonPathStats stats, JsQueryItem *jsq, JsQueryItem *jsqLeftArg) +{ + JsQueryItem elem; + Selectivity res = DEFAULT_JSQUERY_SEL; + + check_stack_depth(); + + switch (jsq->type) + { + case jqiAnd: + jsqGetLeftArg(jsq, &elem); + res = jsqSelectivity(stats, &elem, jsqLeftArg); + if (res > 0.0) + { + jsqGetRightArg(jsq, &elem); + res *= jsqSelectivity(stats, &elem, jsqLeftArg); + } + break; + + case jqiOr: + jsqGetLeftArg(jsq, &elem); + res = jsqSelectivity(stats, &elem, jsqLeftArg); + if (res < 1.0) + { + jsqGetRightArg(jsq, &elem); + res = 1.0 - (1.0 - res) * + (1.0 - jsqSelectivity(stats, &elem, jsqLeftArg)); + } + break; + + case jqiNot: + jsqGetArg(jsq, &elem); + res = 1.0 - jsqSelectivity(stats, &elem, jsqLeftArg); + break; + + case jqiKey: + { + int keylen; + char *key = jsqGetString(jsq, &keylen); + JsonPathStats keystats = jsonPathStatsGetSubpath(stats, + key, keylen); + + if (keystats) + { + jsqGetNext(jsq, &elem); + res = jsqSelectivity(keystats, &elem, NULL); + } + else + res = 0.0; + + break; + } + + case jqiCurrent: + jsqGetNext(jsq, &elem); + res = jsqSelectivity(stats, &elem, jsqLeftArg); + break; + + case jqiAnyKey: + case jqiAllKey: + jsqGetNext(jsq, &elem); + res = jsqSelectivityKeyAnyAll(stats, &elem, jsq->type == jqiAnyKey); + break; + + case jqiAnyArray: + case jqiAllArray: + jsqGetNext(jsq, &elem); + res = jsqSelectivityArrayAnyAll(stats, &elem, + jsq->type == jqiAnyArray); + break; + + case jqiAny: + case jqiAll: + res = jsqSelectivityAnyAll(stats, jsq, jsq->type == jqiAny); + break; + + case jqiEqual: + case jqiIn: + case jqiLess: + case jqiGreater: + case jqiLessOrEqual: + case jqiGreaterOrEqual: + case jqiContains: + case jqiContained: + case jqiOverlap: + res = jsqSelectivityExpr(stats, jsq, jsqLeftArg); + break; + + case jqiLength: + jsqGetNext(jsq, &elem); + stats = jsonPathStatsGetLengthStats(stats); + res = stats ? jsqSelectivity(stats, &elem, jsq) : 0.0; + break; + + case jqiIs: + res = jsonPathStatsGetTypeFreq(stats, jsqGetIsType(jsq), 0.0); + break; + + default: + elog(ERROR,"Wrong state: %d", jsq->type); + } + + CLAMP_PROBABILITY(res); + + return res; +} + +PG_FUNCTION_INFO_V1(jsquery_sel); + +Datum +jsquery_sel(PG_FUNCTION_ARGS) +{ + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); +/* Oid operator = PG_GETARG_OID(1); */ + List *args = (List *) PG_GETARG_POINTER(2); + int varRelid = PG_GETARG_INT32(3); + Selectivity sel = DEFAULT_JSQUERY_SEL; + VariableStatData vardata; + JsonStatData jsdata; + JsonPathStats pstats; + Node *other; + bool varonleft; + JsQuery *jq; + JsQueryItem jsq; + + if (!get_restriction_variable(root, args, varRelid, + &vardata, &other, &varonleft)) + PG_RETURN_FLOAT8((float8) sel); + + if (!IsA(other, Const) || !varonleft /* FIXME l/r operator */) + goto out; + + if (((Const *) other)->constisnull) + { + sel = 0.0; + goto out; + } + + jq = DatumGetJsQueryP(((Const *) other)->constvalue); + jsqInit(&jsq, jq); + + if (!jsonStatsInit(&jsdata, &vardata)) + goto out; + + pstats = jsonStatsGetPathStatsStr(&jsdata, "$", 1); + + sel = pstats ? jsqSelectivity(pstats, &jsq, NULL) : 0.0; + +out: + ReleaseVariableStats(vardata); + + CLAMP_PROBABILITY(sel); + + PG_RETURN_FLOAT8((float8) sel); +} diff --git a/sql/jsquery_stats.sql b/sql/jsquery_stats.sql new file mode 100644 index 0000000..9140d2b --- /dev/null +++ b/sql/jsquery_stats.sql @@ -0,0 +1,175 @@ +DROP FUNCTION IF EXISTS explain_jsonb(text); + +CREATE OR REPLACE FUNCTION explain_jsonb(sql_query text) +RETURNS TABLE(explain_line json) AS +$$ +BEGIN + RETURN QUERY EXECUTE 'EXPLAIN (ANALYZE, FORMAT json) ' || sql_query; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION get_plan_and_actual_rows(sql_query text) +RETURNS TABLE(plan integer, actual integer) AS +$$ + SELECT + (plan->>'Plan Rows')::integer plan, + (plan->>'Actual Rows')::integer actual + FROM ( + SELECT explain_jsonb(sql_query) #> '{0,Plan,Plans,0}' + ) p(plan) +$$ LANGUAGE sql; + +CREATE OR REPLACE FUNCTION check_estimate(sql_query text, accuracy real) +RETURNS boolean AS +$$ + SELECT plan BETWEEN actual / (1 + accuracy) AND (actual + 1) * (1 + accuracy) + FROM (SELECT * FROM get_plan_and_actual_rows(sql_query)) x +$$ LANGUAGE sql; + +CREATE OR REPLACE FUNCTION check_estimate2(sql_query text, accuracy real) +RETURNS TABLE(min integer, max integer) AS +$$ + SELECT (actual * (1 - accuracy))::integer, ((actual + 1) * (1 + accuracy))::integer + FROM (SELECT * FROM get_plan_and_actual_rows(sql_query)) x +$$ LANGUAGE sql; + +DROP TABLE IF EXISTS jsonb_stats_test; +CREATE TABLE jsonb_stats_test(js jsonb); + +INSERT INTO jsonb_stats_test SELECT NULL FROM generate_series(1, 10000); + +INSERT INTO jsonb_stats_test SELECT 'null' FROM generate_series(1, 2000); +INSERT INTO jsonb_stats_test SELECT 'true' FROM generate_series(1, 3000); +INSERT INTO jsonb_stats_test SELECT 'false' FROM generate_series(1, 5000); + +INSERT INTO jsonb_stats_test SELECT '12345' FROM generate_series(1, 1000); +INSERT INTO jsonb_stats_test SELECT (1000 * (i % 10))::text::jsonb FROM generate_series(1, 4000) i; +INSERT INTO jsonb_stats_test SELECT i::text::jsonb FROM generate_series(1, 5000) i; + +INSERT INTO jsonb_stats_test SELECT '"foo"' FROM generate_series(1, 1000); +INSERT INTO jsonb_stats_test SELECT format('"bar%s"', i % 10)::jsonb FROM generate_series(1, 4000) i; +INSERT INTO jsonb_stats_test SELECT format('"baz%s"', i)::jsonb FROM generate_series(1, 5000) i; + +INSERT INTO jsonb_stats_test SELECT '{}' FROM generate_series(1, 1000); +INSERT INTO jsonb_stats_test SELECT jsonb_build_object('foo', 'bar') FROM generate_series(1, 1000); +INSERT INTO jsonb_stats_test SELECT jsonb_build_object('foo', 'baz' || (i % 10)) FROM generate_series(1, 3000) i; +INSERT INTO jsonb_stats_test SELECT jsonb_build_object('foo', i % 10) FROM generate_series(1, 2000) i; +INSERT INTO jsonb_stats_test SELECT jsonb_build_object('"foo \"bar"', i % 10) FROM generate_series(1, 2000) i; + +INSERT INTO jsonb_stats_test SELECT '[]' FROM generate_series(1, 1000); +INSERT INTO jsonb_stats_test SELECT '["foo"]' FROM generate_series(1, 2000); +INSERT INTO jsonb_stats_test SELECT '[12345]' FROM generate_series(1, 3000); +INSERT INTO jsonb_stats_test SELECT '[["foo"]]' FROM generate_series(1, 2000); +INSERT INTO jsonb_stats_test SELECT '[{"key": "foo"}]' FROM generate_series(1, 2000); +INSERT INTO jsonb_stats_test SELECT '[null, "foo"]' FROM generate_series(1, 2000); +INSERT INTO jsonb_stats_test SELECT '[null, 12345]' FROM generate_series(1, 3000); +INSERT INTO jsonb_stats_test SELECT '[null, ["foo"]]' FROM generate_series(1, 2000); +INSERT INTO jsonb_stats_test SELECT '[null, {"key": "foo"}]' FROM generate_series(1, 2000); + +-- Build random variable-length integer arrays +INSERT INTO jsonb_stats_test +SELECT jsonb_build_object('array', + jsonb_build_array()) +FROM generate_series(1, 1000); + +INSERT INTO jsonb_stats_test +SELECT jsonb_build_object('array', + jsonb_build_array( + floor(random() * 10)::int)) +FROM generate_series(1, 4000); + +INSERT INTO jsonb_stats_test +SELECT jsonb_build_object('array', + jsonb_build_array( + floor(random() * 10)::int, + floor(random() * 10)::int)) +FROM generate_series(1, 3000); + +INSERT INTO jsonb_stats_test +SELECT jsonb_build_object('array', + jsonb_build_array( + floor(random() * 10)::int, + floor(random() * 10)::int, + floor(random() * 10)::int)) +FROM generate_series(1, 2000); + + +ANALYZE jsonb_stats_test; + +CREATE OR REPLACE FUNCTION check_jsonb_stats_test_estimate(sql_condition text, accuracy real) +RETURNS boolean AS +$$ + SELECT check_estimate('SELECT count(*) FROM jsonb_stats_test WHERE ' || sql_condition, accuracy) +$$ LANGUAGE sql; + +DROP FUNCTION IF EXISTS check_jsonb_stats_test_estimate2(text, real); + +CREATE OR REPLACE FUNCTION check_jsonb_stats_test_estimate2(sql_condition text, accuracy real) +RETURNS TABLE(plan integer, actual integer) AS +$$ + SELECT get_plan_and_actual_rows('SELECT count(*) FROM jsonb_stats_test WHERE ' || sql_condition) +$$ LANGUAGE sql; + + +SELECT check_jsonb_stats_test_estimate($$js @@ '$ = *'$$, 0.01); +SELECT check_jsonb_stats_test_estimate($$js @@ '* = *'$$, 0.01); +SELECT check_jsonb_stats_test_estimate2($$js @@ '% = *'$$, 0.01); + +SELECT check_jsonb_stats_test_estimate($$js @@ '$ = null'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '$ = true'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '$ = false'$$, 0.1); + +SELECT check_jsonb_stats_test_estimate($$js @@ '$ IS BOOLEAN'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '$ IS NUMERIC'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '$ IS STRING'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '$ IS OBJECT'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '$ IS ARRAY'$$, 0.1); + +SELECT check_jsonb_stats_test_estimate($$js @@ 'bad_key = *'$$, 0.01); +SELECT check_jsonb_stats_test_estimate($$js @@ 'bad_key = null'$$, 0.01); +SELECT check_jsonb_stats_test_estimate($$js @@ '*.bad_key = *'$$, 0.01); +SELECT check_jsonb_stats_test_estimate($$js @@ '%.bad_key = *'$$, 0.01); +SELECT check_jsonb_stats_test_estimate($$js @@ 'bad_key.* = *'$$, 0.01); +SELECT check_jsonb_stats_test_estimate($$js @@ 'bad_key.# = *'$$, 0.01); +SELECT check_jsonb_stats_test_estimate($$js @@ 'bad_key.% = *'$$, 0.01); +SELECT check_jsonb_stats_test_estimate($$js @@ 'bad_key1.bad_key2 = *'$$, 0.01); + +SELECT check_jsonb_stats_test_estimate($$js @@ 'foo = *'$$, 0.1); + +SELECT check_jsonb_stats_test_estimate($$js @@ '# IS OBJECT'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '#: IS OBJECT'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '# IS ARRAY'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '#: IS ARRAY'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '# IS STRING'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '#: IS STRING'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '# IS NUMERIC'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '#: IS NUMERIC'$$, 0.1); + +SELECT check_jsonb_stats_test_estimate($$js @@ '# = *'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '#: = *'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '# = 3'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '#: = 3'$$, 0.2); +SELECT check_jsonb_stats_test_estimate($$js @@ '# = 100'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '#: = 100'$$, 0.1); + +SELECT check_jsonb_stats_test_estimate($$js @@ '@# = 0'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '@# = 1'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '@# = 2'$$, 0.1); + +SELECT check_jsonb_stats_test_estimate($$js @@ '# = "foo"'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '#.# = "foo"'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '#.% = "foo"'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ '#.* = "foo"'$$, 0.1); + +SELECT check_jsonb_stats_test_estimate($$js @@ 'array = *'$$, 0.1); + +SELECT check_jsonb_stats_test_estimate($$js @@ 'array.@# = 0'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ 'array.@# = 1'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ 'array.@# = 2'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ 'array.@# = 3'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ 'array.@# = 4'$$, 0.1); +SELECT check_jsonb_stats_test_estimate($$js @@ 'array.@# < 3'$$, 0.1); + +SELECT check_jsonb_stats_test_estimate($$js @@ 'array @> [3]'$$, 0.1); +SELECT check_jsonb_stats_test_estimate2($$js @@ 'array @> [3,8]'$$, 1); + pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy