Skip to content

Commit e83daa7

Browse files
committed
Use multi-variate MCV lists to estimate ScalarArrayOpExpr
Commit 8f321bd added support for estimating ScalarArrayOpExpr clauses (IN/ANY) clauses using functional dependencies. There's no good reason not to support estimation of these clauses using multi-variate MCV lists too, so this commits implements that. That makes the behavior consistent and MCV lists can estimate all variants (ANY/ALL, inequalities, ...). Author: Tomas Vondra Review: Dean Rasheed Discussion: https://www.postgresql.org/message-id/flat/13902317.Eha0YfKkKy%40pierred-pdoc
1 parent 8f321bd commit e83daa7

File tree

5 files changed

+252
-9
lines changed

5 files changed

+252
-9
lines changed

src/backend/statistics/extended_stats.c

Lines changed: 61 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -994,7 +994,63 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
994994
return false;
995995

996996
/* Check if the expression the right shape (one Var, one Const) */
997-
if (!examine_opclause_expression(expr, &var, NULL, NULL))
997+
if (!examine_clause_args(expr->args, &var, NULL, NULL))
998+
return false;
999+
1000+
/*
1001+
* If it's not one of the supported operators ("=", "<", ">", etc.),
1002+
* just ignore the clause, as it's not compatible with MCV lists.
1003+
*
1004+
* This uses the function for estimating selectivity, not the operator
1005+
* directly (a bit awkward, but well ...).
1006+
*/
1007+
switch (get_oprrest(expr->opno))
1008+
{
1009+
case F_EQSEL:
1010+
case F_NEQSEL:
1011+
case F_SCALARLTSEL:
1012+
case F_SCALARLESEL:
1013+
case F_SCALARGTSEL:
1014+
case F_SCALARGESEL:
1015+
/* supported, will continue with inspection of the Var */
1016+
break;
1017+
1018+
default:
1019+
/* other estimators are considered unknown/unsupported */
1020+
return false;
1021+
}
1022+
1023+
/*
1024+
* If there are any securityQuals on the RTE from security barrier
1025+
* views or RLS policies, then the user may not have access to all the
1026+
* table's data, and we must check that the operator is leak-proof.
1027+
*
1028+
* If the operator is leaky, then we must ignore this clause for the
1029+
* purposes of estimating with MCV lists, otherwise the operator might
1030+
* reveal values from the MCV list that the user doesn't have
1031+
* permission to see.
1032+
*/
1033+
if (rte->securityQuals != NIL &&
1034+
!get_func_leakproof(get_opcode(expr->opno)))
1035+
return false;
1036+
1037+
return statext_is_compatible_clause_internal(root, (Node *) var,
1038+
relid, attnums);
1039+
}
1040+
1041+
/* Var IN Array */
1042+
if (IsA(clause, ScalarArrayOpExpr))
1043+
{
1044+
RangeTblEntry *rte = root->simple_rte_array[relid];
1045+
ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1046+
Var *var;
1047+
1048+
/* Only expressions with two arguments are considered compatible. */
1049+
if (list_length(expr->args) != 2)
1050+
return false;
1051+
1052+
/* Check if the expression the right shape (one Var, one Const) */
1053+
if (!examine_clause_args(expr->args, &var, NULL, NULL))
9981054
return false;
9991055

10001056
/*
@@ -1396,7 +1452,7 @@ statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
13961452
* on which side of the operator we found the Var node.
13971453
*/
13981454
bool
1399-
examine_opclause_expression(OpExpr *expr, Var **varp, Const **cstp, bool *varonleftp)
1455+
examine_clause_args(List *args, Var **varp, Const **cstp, bool *varonleftp)
14001456
{
14011457
Var *var;
14021458
Const *cst;
@@ -1405,10 +1461,10 @@ examine_opclause_expression(OpExpr *expr, Var **varp, Const **cstp, bool *varonl
14051461
*rightop;
14061462

14071463
/* enforced by statext_is_compatible_clause_internal */
1408-
Assert(list_length(expr->args) == 2);
1464+
Assert(list_length(args) == 2);
14091465

1410-
leftop = linitial(expr->args);
1411-
rightop = lsecond(expr->args);
1466+
leftop = linitial(args);
1467+
rightop = lsecond(args);
14121468

14131469
/* strip RelabelType from either side of the expression */
14141470
if (IsA(leftop, RelabelType))

src/backend/statistics/mcv.c

Lines changed: 109 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1579,15 +1579,15 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
15791579
OpExpr *expr = (OpExpr *) clause;
15801580
FmgrInfo opproc;
15811581

1582-
/* valid only after examine_opclause_expression returns true */
1582+
/* valid only after examine_clause_args returns true */
15831583
Var *var;
15841584
Const *cst;
15851585
bool varonleft;
15861586

15871587
fmgr_info(get_opcode(expr->opno), &opproc);
15881588

15891589
/* extract the var and const from the expression */
1590-
if (examine_opclause_expression(expr, &var, &cst, &varonleft))
1590+
if (examine_clause_args(expr->args, &var, &cst, &varonleft))
15911591
{
15921592
int idx;
15931593

@@ -1652,6 +1652,113 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
16521652
}
16531653
}
16541654
}
1655+
else if (IsA(clause, ScalarArrayOpExpr))
1656+
{
1657+
ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1658+
FmgrInfo opproc;
1659+
1660+
/* valid only after examine_clause_args returns true */
1661+
Var *var;
1662+
Const *cst;
1663+
bool varonleft;
1664+
1665+
fmgr_info(get_opcode(expr->opno), &opproc);
1666+
1667+
/* extract the var and const from the expression */
1668+
if (examine_clause_args(expr->args, &var, &cst, &varonleft))
1669+
{
1670+
int idx;
1671+
1672+
ArrayType *arrayval;
1673+
int16 elmlen;
1674+
bool elmbyval;
1675+
char elmalign;
1676+
int num_elems;
1677+
Datum *elem_values;
1678+
bool *elem_nulls;
1679+
1680+
/* ScalarArrayOpExpr has the Var always on the left */
1681+
Assert(varonleft);
1682+
1683+
if (!cst->constisnull)
1684+
{
1685+
arrayval = DatumGetArrayTypeP(cst->constvalue);
1686+
get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
1687+
&elmlen, &elmbyval, &elmalign);
1688+
deconstruct_array(arrayval,
1689+
ARR_ELEMTYPE(arrayval),
1690+
elmlen, elmbyval, elmalign,
1691+
&elem_values, &elem_nulls, &num_elems);
1692+
}
1693+
1694+
/* match the attribute to a dimension of the statistic */
1695+
idx = bms_member_index(keys, var->varattno);
1696+
1697+
/*
1698+
* Walk through the MCV items and evaluate the current clause.
1699+
* We can skip items that were already ruled out, and
1700+
* terminate if there are no remaining MCV items that might
1701+
* possibly match.
1702+
*/
1703+
for (i = 0; i < mcvlist->nitems; i++)
1704+
{
1705+
int j;
1706+
bool match = (expr->useOr ? false : true);
1707+
MCVItem *item = &mcvlist->items[i];
1708+
1709+
/*
1710+
* When the MCV item or the Const value is NULL we can treat
1711+
* this as a mismatch. We must not call the operator because
1712+
* of strictness.
1713+
*/
1714+
if (item->isnull[idx] || cst->constisnull)
1715+
{
1716+
matches[i] = RESULT_MERGE(matches[i], is_or, false);
1717+
continue;
1718+
}
1719+
1720+
/*
1721+
* Skip MCV items that can't change result in the bitmap.
1722+
* Once the value gets false for AND-lists, or true for
1723+
* OR-lists, we don't need to look at more clauses.
1724+
*/
1725+
if (RESULT_IS_FINAL(matches[i], is_or))
1726+
continue;
1727+
1728+
for (j = 0; j < num_elems; j++)
1729+
{
1730+
Datum elem_value = elem_values[j];
1731+
bool elem_isnull = elem_nulls[j];
1732+
bool elem_match;
1733+
1734+
/* NULL values always evaluate as not matching. */
1735+
if (elem_isnull)
1736+
{
1737+
match = RESULT_MERGE(match, expr->useOr, false);
1738+
continue;
1739+
}
1740+
1741+
/*
1742+
* Stop evaluating the array elements once we reach
1743+
* match value that can't change - ALL() is the same
1744+
* as AND-list, ANY() is the same as OR-list.
1745+
*/
1746+
if (RESULT_IS_FINAL(match, expr->useOr))
1747+
break;
1748+
1749+
elem_match = DatumGetBool(FunctionCall2Coll(&opproc,
1750+
var->varcollid,
1751+
item->values[idx],
1752+
elem_value));
1753+
1754+
match = RESULT_MERGE(match, expr->useOr, elem_match);
1755+
}
1756+
1757+
/* update the match bitmap with the result */
1758+
matches[i] = RESULT_MERGE(matches[i], is_or, match);
1759+
}
1760+
}
1761+
}
16551762
else if (IsA(clause, NullTest))
16561763
{
16571764
NullTest *expr = (NullTest *) clause;

src/include/statistics/extended_stats_internal.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,8 @@ extern SortItem *build_sorted_items(int numrows, int *nitems, HeapTuple *rows,
9696
TupleDesc tdesc, MultiSortSupport mss,
9797
int numattrs, AttrNumber *attnums);
9898

99-
extern bool examine_opclause_expression(OpExpr *expr, Var **varp,
100-
Const **cstp, bool *varonleftp);
99+
extern bool examine_clause_args(List *args, Var **varp,
100+
Const **cstp, bool *varonleftp);
101101

102102
extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root,
103103
StatisticExtInfo *stat,

src/test/regress/expected/stats_ext.out

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -827,6 +827,36 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
827827
343 | 200
828828
(1 row)
829829

830+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
831+
estimated | actual
832+
-----------+--------
833+
8 | 200
834+
(1 row)
835+
836+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])');
837+
estimated | actual
838+
-----------+--------
839+
8 | 200
840+
(1 row)
841+
842+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')');
843+
estimated | actual
844+
-----------+--------
845+
26 | 150
846+
(1 row)
847+
848+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])');
849+
estimated | actual
850+
-----------+--------
851+
10 | 100
852+
(1 row)
853+
854+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])');
855+
estimated | actual
856+
-----------+--------
857+
1 | 100
858+
(1 row)
859+
830860
-- create statistics
831861
CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
832862
ANALYZE mcv_lists;
@@ -872,6 +902,36 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
872902
200 | 200
873903
(1 row)
874904

905+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
906+
estimated | actual
907+
-----------+--------
908+
200 | 200
909+
(1 row)
910+
911+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])');
912+
estimated | actual
913+
-----------+--------
914+
200 | 200
915+
(1 row)
916+
917+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')');
918+
estimated | actual
919+
-----------+--------
920+
150 | 150
921+
(1 row)
922+
923+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])');
924+
estimated | actual
925+
-----------+--------
926+
100 | 100
927+
(1 row)
928+
929+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])');
930+
estimated | actual
931+
-----------+--------
932+
100 | 100
933+
(1 row)
934+
875935
-- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute)
876936
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
877937
estimated | actual

src/test/regress/sql/stats_ext.sql

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,16 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
461461

462462
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
463463

464+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
465+
466+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])');
467+
468+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')');
469+
470+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])');
471+
472+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])');
473+
464474
-- create statistics
465475
CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
466476

@@ -480,6 +490,16 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <
480490

481491
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1');
482492

493+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
494+
495+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])');
496+
497+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')');
498+
499+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])');
500+
501+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])');
502+
483503
-- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute)
484504
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
485505

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy