Skip to content

Commit 30833ba

Browse files
committed
Expand partitioned tables in PartDesc order.
Previously, we expanded the inheritance hierarchy in the order in which find_all_inheritors had locked the tables, but that turns out to block quite a bit of useful optimization. For example, a partition-wise join can't count on two tables with matching bounds to get expanded in the same order. Where possible, this change results in expanding partitioned tables in *bound* order. Bound order isn't well-defined for a list-partitioned table with a null-accepting partition or for a list-partitioned table where the bounds for a single partition are interleaved with other partitions. However, when expansion in bound order is possible, it opens up further opportunities for optimization, such as strength-reducing MergeAppend to Append when the expansion order matches the desired sort order. Patch by me, with cosmetic revisions by Ashutosh Bapat. Discussion: http://postgr.es/m/CA+TgmoZrKj7kEzcMSum3aXV4eyvvbh9WD=c6m=002WMheDyE3A@mail.gmail.com
1 parent 6708e44 commit 30833ba

File tree

2 files changed

+220
-112
lines changed

2 files changed

+220
-112
lines changed

src/backend/optimizer/prep/prepunion.c

Lines changed: 218 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "access/heapam.h"
3434
#include "access/htup_details.h"
3535
#include "access/sysattr.h"
36+
#include "catalog/partition.h"
3637
#include "catalog/pg_inherits_fn.h"
3738
#include "catalog/pg_type.h"
3839
#include "miscadmin.h"
@@ -100,6 +101,19 @@ static List *generate_append_tlist(List *colTypes, List *colCollations,
100101
static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
101102
static void expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte,
102103
Index rti);
104+
static void expand_partitioned_rtentry(PlannerInfo *root,
105+
RangeTblEntry *parentrte,
106+
Index parentRTindex, Relation parentrel,
107+
PlanRowMark *parentrc, PartitionDesc partdesc,
108+
LOCKMODE lockmode,
109+
bool *has_child, List **appinfos,
110+
List **partitioned_child_rels);
111+
static void expand_single_inheritance_child(PlannerInfo *root,
112+
RangeTblEntry *parentrte,
113+
Index parentRTindex, Relation parentrel,
114+
PlanRowMark *parentrc, Relation childrel,
115+
bool *has_child, List **appinfos,
116+
List **partitioned_child_rels);
103117
static void make_inh_translation_list(Relation oldrelation,
104118
Relation newrelation,
105119
Index newvarno,
@@ -1455,131 +1469,62 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
14551469
/* Scan the inheritance set and expand it */
14561470
appinfos = NIL;
14571471
has_child = false;
1458-
foreach(l, inhOIDs)
1472+
if (RelationGetPartitionDesc(oldrelation) != NULL)
14591473
{
1460-
Oid childOID = lfirst_oid(l);
1461-
Relation newrelation;
1462-
RangeTblEntry *childrte;
1463-
Index childRTindex;
1464-
AppendRelInfo *appinfo;
1465-
1466-
/* Open rel if needed; we already have required locks */
1467-
if (childOID != parentOID)
1468-
newrelation = heap_open(childOID, NoLock);
1469-
else
1470-
newrelation = oldrelation;
1471-
1472-
/*
1473-
* It is possible that the parent table has children that are temp
1474-
* tables of other backends. We cannot safely access such tables
1475-
* (because of buffering issues), and the best thing to do seems to be
1476-
* to silently ignore them.
1477-
*/
1478-
if (childOID != parentOID && RELATION_IS_OTHER_TEMP(newrelation))
1479-
{
1480-
heap_close(newrelation, lockmode);
1481-
continue;
1482-
}
1483-
14841474
/*
1485-
* Build an RTE for the child, and attach to query's rangetable list.
1486-
* We copy most fields of the parent's RTE, but replace relation OID
1487-
* and relkind, and set inh = false. Also, set requiredPerms to zero
1488-
* since all required permissions checks are done on the original RTE.
1489-
* Likewise, set the child's securityQuals to empty, because we only
1490-
* want to apply the parent's RLS conditions regardless of what RLS
1491-
* properties individual children may have. (This is an intentional
1492-
* choice to make inherited RLS work like regular permissions checks.)
1493-
* The parent securityQuals will be propagated to children along with
1494-
* other base restriction clauses, so we don't need to do it here.
1475+
* If this table has partitions, recursively expand them in the order
1476+
* in which they appear in the PartitionDesc. But first, expand the
1477+
* parent itself.
14951478
*/
1496-
childrte = copyObject(rte);
1497-
childrte->relid = childOID;
1498-
childrte->relkind = newrelation->rd_rel->relkind;
1499-
childrte->inh = false;
1500-
childrte->requiredPerms = 0;
1501-
childrte->securityQuals = NIL;
1502-
parse->rtable = lappend(parse->rtable, childrte);
1503-
childRTindex = list_length(parse->rtable);
1504-
1479+
expand_single_inheritance_child(root, rte, rti, oldrelation, oldrc,
1480+
oldrelation,
1481+
&has_child, &appinfos,
1482+
&partitioned_child_rels);
1483+
expand_partitioned_rtentry(root, rte, rti, oldrelation, oldrc,
1484+
RelationGetPartitionDesc(oldrelation),
1485+
lockmode,
1486+
&has_child, &appinfos,
1487+
&partitioned_child_rels);
1488+
}
1489+
else
1490+
{
15051491
/*
1506-
* Build an AppendRelInfo for this parent and child, unless the child
1507-
* is a partitioned table.
1492+
* This table has no partitions. Expand any plain inheritance
1493+
* children in the order the OIDs were returned by
1494+
* find_all_inheritors.
15081495
*/
1509-
if (childrte->relkind != RELKIND_PARTITIONED_TABLE)
1496+
foreach(l, inhOIDs)
15101497
{
1511-
/* Remember if we saw a real child. */
1498+
Oid childOID = lfirst_oid(l);
1499+
Relation newrelation;
1500+
1501+
/* Open rel if needed; we already have required locks */
15121502
if (childOID != parentOID)
1513-
has_child = true;
1514-
1515-
appinfo = makeNode(AppendRelInfo);
1516-
appinfo->parent_relid = rti;
1517-
appinfo->child_relid = childRTindex;
1518-
appinfo->parent_reltype = oldrelation->rd_rel->reltype;
1519-
appinfo->child_reltype = newrelation->rd_rel->reltype;
1520-
make_inh_translation_list(oldrelation, newrelation, childRTindex,
1521-
&appinfo->translated_vars);
1522-
appinfo->parent_reloid = parentOID;
1523-
appinfos = lappend(appinfos, appinfo);
1503+
newrelation = heap_open(childOID, NoLock);
1504+
else
1505+
newrelation = oldrelation;
15241506

15251507
/*
1526-
* Translate the column permissions bitmaps to the child's attnums
1527-
* (we have to build the translated_vars list before we can do
1528-
* this). But if this is the parent table, leave copyObject's
1529-
* result alone.
1530-
*
1531-
* Note: we need to do this even though the executor won't run any
1532-
* permissions checks on the child RTE. The
1533-
* insertedCols/updatedCols bitmaps may be examined for
1534-
* trigger-firing purposes.
1508+
* It is possible that the parent table has children that are temp
1509+
* tables of other backends. We cannot safely access such tables
1510+
* (because of buffering issues), and the best thing to do seems
1511+
* to be to silently ignore them.
15351512
*/
1536-
if (childOID != parentOID)
1513+
if (childOID != parentOID && RELATION_IS_OTHER_TEMP(newrelation))
15371514
{
1538-
childrte->selectedCols = translate_col_privs(rte->selectedCols,
1539-
appinfo->translated_vars);
1540-
childrte->insertedCols = translate_col_privs(rte->insertedCols,
1541-
appinfo->translated_vars);
1542-
childrte->updatedCols = translate_col_privs(rte->updatedCols,
1543-
appinfo->translated_vars);
1515+
heap_close(newrelation, lockmode);
1516+
continue;
15441517
}
1545-
}
1546-
else
1547-
partitioned_child_rels = lappend_int(partitioned_child_rels,
1548-
childRTindex);
15491518

1550-
/*
1551-
* Build a PlanRowMark if parent is marked FOR UPDATE/SHARE.
1552-
*/
1553-
if (oldrc)
1554-
{
1555-
PlanRowMark *newrc = makeNode(PlanRowMark);
1556-
1557-
newrc->rti = childRTindex;
1558-
newrc->prti = rti;
1559-
newrc->rowmarkId = oldrc->rowmarkId;
1560-
/* Reselect rowmark type, because relkind might not match parent */
1561-
newrc->markType = select_rowmark_type(childrte, oldrc->strength);
1562-
newrc->allMarkTypes = (1 << newrc->markType);
1563-
newrc->strength = oldrc->strength;
1564-
newrc->waitPolicy = oldrc->waitPolicy;
1565-
1566-
/*
1567-
* We mark RowMarks for partitioned child tables as parent
1568-
* RowMarks so that the executor ignores them (except their
1569-
* existence means that the child tables be locked using
1570-
* appropriate mode).
1571-
*/
1572-
newrc->isParent = (childrte->relkind == RELKIND_PARTITIONED_TABLE);
1573-
1574-
/* Include child's rowmark type in parent's allMarkTypes */
1575-
oldrc->allMarkTypes |= newrc->allMarkTypes;
1519+
expand_single_inheritance_child(root, rte, rti, oldrelation, oldrc,
1520+
newrelation,
1521+
&has_child, &appinfos,
1522+
&partitioned_child_rels);
15761523

1577-
root->rowMarks = lappend(root->rowMarks, newrc);
1524+
/* Close child relations, but keep locks */
1525+
if (childOID != parentOID)
1526+
heap_close(newrelation, NoLock);
15781527
}
1579-
1580-
/* Close child relations, but keep locks */
1581-
if (childOID != parentOID)
1582-
heap_close(newrelation, NoLock);
15831528
}
15841529

15851530
heap_close(oldrelation, NoLock);
@@ -1620,6 +1565,169 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
16201565
root->append_rel_list = list_concat(root->append_rel_list, appinfos);
16211566
}
16221567

1568+
static void
1569+
expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte,
1570+
Index parentRTindex, Relation parentrel,
1571+
PlanRowMark *parentrc, PartitionDesc partdesc,
1572+
LOCKMODE lockmode,
1573+
bool *has_child, List **appinfos,
1574+
List **partitioned_child_rels)
1575+
{
1576+
int i;
1577+
1578+
check_stack_depth();
1579+
1580+
for (i = 0; i < partdesc->nparts; i++)
1581+
{
1582+
Oid childOID = partdesc->oids[i];
1583+
Relation childrel;
1584+
1585+
/* Open rel; we already have required locks */
1586+
childrel = heap_open(childOID, NoLock);
1587+
1588+
/* As in expand_inherited_rtentry, skip non-local temp tables */
1589+
if (RELATION_IS_OTHER_TEMP(childrel))
1590+
{
1591+
heap_close(childrel, lockmode);
1592+
continue;
1593+
}
1594+
1595+
expand_single_inheritance_child(root, parentrte, parentRTindex,
1596+
parentrel, parentrc, childrel,
1597+
has_child, appinfos,
1598+
partitioned_child_rels);
1599+
1600+
/* If this child is itself partitioned, recurse */
1601+
if (childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1602+
expand_partitioned_rtentry(root, parentrte, parentRTindex,
1603+
parentrel, parentrc,
1604+
RelationGetPartitionDesc(childrel),
1605+
lockmode,
1606+
has_child, appinfos,
1607+
partitioned_child_rels);
1608+
1609+
/* Close child relation, but keep locks */
1610+
heap_close(childrel, NoLock);
1611+
}
1612+
}
1613+
1614+
/*
1615+
* expand_single_inheritance_child
1616+
* Expand a single inheritance child, if needed.
1617+
*
1618+
* If this is a temp table of another backend, we'll return without doing
1619+
* anything at all. Otherwise, we'll set "has_child" to true, build a
1620+
* RangeTblEntry and either a PartitionedChildRelInfo or AppendRelInfo as
1621+
* appropriate, plus maybe a PlanRowMark.
1622+
*/
1623+
static void
1624+
expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
1625+
Index parentRTindex, Relation parentrel,
1626+
PlanRowMark *parentrc, Relation childrel,
1627+
bool *has_child, List **appinfos,
1628+
List **partitioned_child_rels)
1629+
{
1630+
Query *parse = root->parse;
1631+
Oid parentOID = RelationGetRelid(parentrel);
1632+
Oid childOID = RelationGetRelid(childrel);
1633+
RangeTblEntry *childrte;
1634+
Index childRTindex;
1635+
AppendRelInfo *appinfo;
1636+
1637+
/*
1638+
* Build an RTE for the child, and attach to query's rangetable list. We
1639+
* copy most fields of the parent's RTE, but replace relation OID and
1640+
* relkind, and set inh = false. Also, set requiredPerms to zero since
1641+
* all required permissions checks are done on the original RTE. Likewise,
1642+
* set the child's securityQuals to empty, because we only want to apply
1643+
* the parent's RLS conditions regardless of what RLS properties
1644+
* individual children may have. (This is an intentional choice to make
1645+
* inherited RLS work like regular permissions checks.) The parent
1646+
* securityQuals will be propagated to children along with other base
1647+
* restriction clauses, so we don't need to do it here.
1648+
*/
1649+
childrte = copyObject(parentrte);
1650+
childrte->relid = childOID;
1651+
childrte->relkind = childrel->rd_rel->relkind;
1652+
childrte->inh = false;
1653+
childrte->requiredPerms = 0;
1654+
childrte->securityQuals = NIL;
1655+
parse->rtable = lappend(parse->rtable, childrte);
1656+
childRTindex = list_length(parse->rtable);
1657+
1658+
/*
1659+
* Build an AppendRelInfo for this parent and child, unless the child is a
1660+
* partitioned table.
1661+
*/
1662+
if (childrte->relkind != RELKIND_PARTITIONED_TABLE)
1663+
{
1664+
/* Remember if we saw a real child. */
1665+
if (childOID != parentOID)
1666+
*has_child = true;
1667+
1668+
appinfo = makeNode(AppendRelInfo);
1669+
appinfo->parent_relid = parentRTindex;
1670+
appinfo->child_relid = childRTindex;
1671+
appinfo->parent_reltype = parentrel->rd_rel->reltype;
1672+
appinfo->child_reltype = childrel->rd_rel->reltype;
1673+
make_inh_translation_list(parentrel, childrel, childRTindex,
1674+
&appinfo->translated_vars);
1675+
appinfo->parent_reloid = parentOID;
1676+
*appinfos = lappend(*appinfos, appinfo);
1677+
1678+
/*
1679+
* Translate the column permissions bitmaps to the child's attnums (we
1680+
* have to build the translated_vars list before we can do this). But
1681+
* if this is the parent table, leave copyObject's result alone.
1682+
*
1683+
* Note: we need to do this even though the executor won't run any
1684+
* permissions checks on the child RTE. The insertedCols/updatedCols
1685+
* bitmaps may be examined for trigger-firing purposes.
1686+
*/
1687+
if (childOID != parentOID)
1688+
{
1689+
childrte->selectedCols = translate_col_privs(parentrte->selectedCols,
1690+
appinfo->translated_vars);
1691+
childrte->insertedCols = translate_col_privs(parentrte->insertedCols,
1692+
appinfo->translated_vars);
1693+
childrte->updatedCols = translate_col_privs(parentrte->updatedCols,
1694+
appinfo->translated_vars);
1695+
}
1696+
}
1697+
else
1698+
*partitioned_child_rels = lappend_int(*partitioned_child_rels,
1699+
childRTindex);
1700+
1701+
/*
1702+
* Build a PlanRowMark if parent is marked FOR UPDATE/SHARE.
1703+
*/
1704+
if (parentrc)
1705+
{
1706+
PlanRowMark *childrc = makeNode(PlanRowMark);
1707+
1708+
childrc->rti = childRTindex;
1709+
childrc->prti = parentRTindex;
1710+
childrc->rowmarkId = parentrc->rowmarkId;
1711+
/* Reselect rowmark type, because relkind might not match parent */
1712+
childrc->markType = select_rowmark_type(childrte, parentrc->strength);
1713+
childrc->allMarkTypes = (1 << childrc->markType);
1714+
childrc->strength = parentrc->strength;
1715+
childrc->waitPolicy = parentrc->waitPolicy;
1716+
1717+
/*
1718+
* We mark RowMarks for partitioned child tables as parent RowMarks so
1719+
* that the executor ignores them (except their existence means that
1720+
* the child tables be locked using appropriate mode).
1721+
*/
1722+
childrc->isParent = (childrte->relkind == RELKIND_PARTITIONED_TABLE);
1723+
1724+
/* Include child's rowmark type in parent's allMarkTypes */
1725+
parentrc->allMarkTypes |= childrc->allMarkTypes;
1726+
1727+
root->rowMarks = lappend(root->rowMarks, childrc);
1728+
}
1729+
}
1730+
16231731
/*
16241732
* make_inh_translation_list
16251733
* Build the list of translations from parent Vars to child Vars for

src/test/regress/expected/insert.out

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -278,12 +278,12 @@ select tableoid::regclass, * from list_parted;
278278
-------------+----+----
279279
part_aa_bb | aA |
280280
part_cc_dd | cC | 1
281-
part_null | | 0
282-
part_null | | 1
283281
part_ee_ff1 | ff | 1
284282
part_ee_ff1 | EE | 1
285283
part_ee_ff2 | ff | 11
286284
part_ee_ff2 | EE | 10
285+
part_null | | 0
286+
part_null | | 1
287287
(8 rows)
288288

289289
-- some more tests to exercise tuple-routing with multi-level partitioning

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy