Skip to content

Commit 9fdb675

Browse files
committed
Faster partition pruning
Add a new module backend/partitioning/partprune.c, implementing a more sophisticated algorithm for partition pruning. The new module uses each partition's "boundinfo" for pruning instead of constraint exclusion, based on an idea proposed by Robert Haas of a "pruning program": a list of steps generated from the query quals which are run iteratively to obtain a list of partitions that must be scanned in order to satisfy those quals. At present, this targets planner-time partition pruning, but there exist further patches to apply partition pruning at execution time as well. This commit also moves some definitions from include/catalog/partition.h to a new file include/partitioning/partbounds.h, in an attempt to rationalize partitioning related code. Authors: Amit Langote, David Rowley, Dilip Kumar Reviewers: Robert Haas, Kyotaro Horiguchi, Ashutosh Bapat, Jesper Pedersen. Discussion: https://postgr.es/m/098b9c71-1915-1a2a-8d52-1a7a50ce79e8@lab.ntt.co.jp
1 parent 11523e8 commit 9fdb675

File tree

27 files changed

+3994
-416
lines changed

27 files changed

+3994
-416
lines changed

src/backend/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ top_builddir = ../..
1818
include $(top_builddir)/src/Makefile.global
1919

2020
SUBDIRS = access bootstrap catalog parser commands executor foreign lib libpq \
21-
main nodes optimizer port postmaster regex replication rewrite \
21+
main nodes optimizer partitioning port postmaster \
22+
regex replication rewrite \
2223
statistics storage tcop tsearch utils $(top_builddir)/src/timezone \
2324
jit
2425

src/backend/catalog/partition.c

Lines changed: 16 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include "optimizer/prep.h"
4242
#include "optimizer/var.h"
4343
#include "parser/parse_coerce.h"
44+
#include "partitioning/partbounds.h"
4445
#include "rewrite/rewriteManip.h"
4546
#include "storage/lmgr.h"
4647
#include "utils/array.h"
@@ -55,89 +56,6 @@
5556
#include "utils/ruleutils.h"
5657
#include "utils/syscache.h"
5758

58-
/*
59-
* Information about bounds of a partitioned relation
60-
*
61-
* A list partition datum that is known to be NULL is never put into the
62-
* datums array. Instead, it is tracked using the null_index field.
63-
*
64-
* In the case of range partitioning, ndatums will typically be far less than
65-
* 2 * nparts, because a partition's upper bound and the next partition's lower
66-
* bound are the same in most common cases, and we only store one of them (the
67-
* upper bound). In case of hash partitioning, ndatums will be same as the
68-
* number of partitions.
69-
*
70-
* For range and list partitioned tables, datums is an array of datum-tuples
71-
* with key->partnatts datums each. For hash partitioned tables, it is an array
72-
* of datum-tuples with 2 datums, modulus and remainder, corresponding to a
73-
* given partition.
74-
*
75-
* The datums in datums array are arranged in increasing order as defined by
76-
* functions qsort_partition_rbound_cmp(), qsort_partition_list_value_cmp() and
77-
* qsort_partition_hbound_cmp() for range, list and hash partitioned tables
78-
* respectively. For range and list partitions this simply means that the
79-
* datums in the datums array are arranged in increasing order as defined by
80-
* the partition key's operator classes and collations.
81-
*
82-
* In the case of list partitioning, the indexes array stores one entry for
83-
* every datum, which is the index of the partition that accepts a given datum.
84-
* In case of range partitioning, it stores one entry per distinct range
85-
* datum, which is the index of the partition for which a given datum
86-
* is an upper bound. In the case of hash partitioning, the number of the
87-
* entries in the indexes array is same as the greatest modulus amongst all
88-
* partitions. For a given partition key datum-tuple, the index of the
89-
* partition which would accept that datum-tuple would be given by the entry
90-
* pointed by remainder produced when hash value of the datum-tuple is divided
91-
* by the greatest modulus.
92-
*/
93-
94-
typedef struct PartitionBoundInfoData
95-
{
96-
char strategy; /* hash, list or range? */
97-
int ndatums; /* Length of the datums following array */
98-
Datum **datums;
99-
PartitionRangeDatumKind **kind; /* The kind of each range bound datum;
100-
* NULL for hash and list partitioned
101-
* tables */
102-
int *indexes; /* Partition indexes */
103-
int null_index; /* Index of the null-accepting partition; -1
104-
* if there isn't one */
105-
int default_index; /* Index of the default partition; -1 if there
106-
* isn't one */
107-
} PartitionBoundInfoData;
108-
109-
#define partition_bound_accepts_nulls(bi) ((bi)->null_index != -1)
110-
#define partition_bound_has_default(bi) ((bi)->default_index != -1)
111-
112-
/*
113-
* When qsort'ing partition bounds after reading from the catalog, each bound
114-
* is represented with one of the following structs.
115-
*/
116-
117-
/* One bound of a hash partition */
118-
typedef struct PartitionHashBound
119-
{
120-
int modulus;
121-
int remainder;
122-
int index;
123-
} PartitionHashBound;
124-
125-
/* One value coming from some (index'th) list partition */
126-
typedef struct PartitionListValue
127-
{
128-
int index;
129-
Datum value;
130-
} PartitionListValue;
131-
132-
/* One bound of a range partition */
133-
typedef struct PartitionRangeBound
134-
{
135-
int index;
136-
Datum *datums; /* range bound datums */
137-
PartitionRangeDatumKind *kind; /* the kind of each datum */
138-
bool lower; /* this is the lower (vs upper) bound */
139-
} PartitionRangeBound;
140-
14159

14260
static Oid get_partition_parent_worker(Relation inhRel, Oid relid);
14361
static void get_partition_ancestors_worker(Relation inhRel, Oid relid,
@@ -173,29 +91,9 @@ static int32 partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc,
17391
Oid *partcollation, Datum *datums1,
17492
PartitionRangeDatumKind *kind1, bool lower1,
17593
PartitionRangeBound *b2);
176-
static int32 partition_rbound_datum_cmp(FmgrInfo *partsupfunc,
177-
Oid *partcollation,
178-
Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
179-
Datum *tuple_datums, int n_tuple_datums);
180-
181-
static int partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
182-
PartitionBoundInfo boundinfo,
183-
Datum value, bool *is_equal);
184-
static int partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
185-
Oid *partcollation,
186-
PartitionBoundInfo boundinfo,
187-
PartitionRangeBound *probe, bool *is_equal);
188-
static int partition_range_datum_bsearch(FmgrInfo *partsupfunc,
189-
Oid *partcollation,
190-
PartitionBoundInfo boundinfo,
191-
int nvalues, Datum *values, bool *is_equal);
192-
static int partition_hash_bsearch(PartitionBoundInfo boundinfo,
193-
int modulus, int remainder);
19494

19595
static int get_partition_bound_num_indexes(PartitionBoundInfo b);
196-
static int get_greatest_modulus(PartitionBoundInfo b);
197-
static uint64 compute_hash_value(int partnatts, FmgrInfo *partsupfunc,
198-
Datum *values, bool *isnull);
96+
19997

20098
/*
20199
* RelationBuildPartitionDesc
@@ -765,13 +663,13 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
765663

766664
if (b1->strategy == PARTITION_STRATEGY_HASH)
767665
{
768-
int greatest_modulus = get_greatest_modulus(b1);
666+
int greatest_modulus = get_hash_partition_greatest_modulus(b1);
769667

770668
/*
771669
* If two hash partitioned tables have different greatest moduli,
772670
* their partition schemes don't match.
773671
*/
774-
if (greatest_modulus != get_greatest_modulus(b2))
672+
if (greatest_modulus != get_hash_partition_greatest_modulus(b2))
775673
return false;
776674

777675
/*
@@ -1029,7 +927,7 @@ check_new_partition_bound(char *relname, Relation parent,
1029927
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1030928
errmsg("every hash partition modulus must be a factor of the next larger modulus")));
1031929

1032-
greatest_modulus = get_greatest_modulus(boundinfo);
930+
greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
1033931
remainder = spec->remainder;
1034932

1035933
/*
@@ -1620,7 +1518,6 @@ get_partition_qual_relid(Oid relid)
16201518
return result;
16211519
}
16221520

1623-
/* Module-local functions */
16241521

16251522
/*
16261523
* get_partition_operator
@@ -2637,7 +2534,7 @@ get_partition_for_tuple(Relation relation, Datum *values, bool *isnull)
26372534
case PARTITION_STRATEGY_HASH:
26382535
{
26392536
PartitionBoundInfo boundinfo = partdesc->boundinfo;
2640-
int greatest_modulus = get_greatest_modulus(boundinfo);
2537+
int greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
26412538
uint64 rowHash = compute_hash_value(key->partnatts,
26422539
key->partsupfunc,
26432540
values, isnull);
@@ -2971,7 +2868,7 @@ partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation,
29712868
* of attributes resp.
29722869
*
29732870
*/
2974-
static int32
2871+
int32
29752872
partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation,
29762873
Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
29772874
Datum *tuple_datums, int n_tuple_datums)
@@ -3005,7 +2902,7 @@ partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation,
30052902
* *is_equal is set to true if the bound datum at the returned index is equal
30062903
* to the input value.
30072904
*/
3008-
static int
2905+
int
30092906
partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
30102907
PartitionBoundInfo boundinfo,
30112908
Datum value, bool *is_equal)
@@ -3048,7 +2945,7 @@ partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
30482945
* *is_equal is set to true if the range bound at the returned index is equal
30492946
* to the input range bound
30502947
*/
3051-
static int
2948+
int
30522949
partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
30532950
Oid *partcollation,
30542951
PartitionBoundInfo boundinfo,
@@ -3093,7 +2990,7 @@ partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
30932990
* *is_equal is set to true if the range bound at the returned index is equal
30942991
* to the input tuple.
30952992
*/
3096-
static int
2993+
int
30972994
partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
30982995
PartitionBoundInfo boundinfo,
30992996
int nvalues, Datum *values, bool *is_equal)
@@ -3136,7 +3033,7 @@ partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
31363033
* less than or equal to the given (modulus, remainder) pair or -1 if
31373034
* all of them are greater
31383035
*/
3139-
static int
3036+
int
31403037
partition_hash_bsearch(PartitionBoundInfo boundinfo,
31413038
int modulus, int remainder)
31423039
{
@@ -3294,7 +3191,7 @@ get_partition_bound_num_indexes(PartitionBoundInfo bound)
32943191
* The number of the entries in the indexes array is same as the
32953192
* greatest modulus.
32963193
*/
3297-
num_indexes = get_greatest_modulus(bound);
3194+
num_indexes = get_hash_partition_greatest_modulus(bound);
32983195
break;
32993196

33003197
case PARTITION_STRATEGY_LIST:
@@ -3315,14 +3212,14 @@ get_partition_bound_num_indexes(PartitionBoundInfo bound)
33153212
}
33163213

33173214
/*
3318-
* get_greatest_modulus
3215+
* get_hash_partition_greatest_modulus
33193216
*
33203217
* Returns the greatest modulus of the hash partition bound. The greatest
33213218
* modulus will be at the end of the datums array because hash partitions are
33223219
* arranged in the ascending order of their modulus and remainders.
33233220
*/
3324-
static int
3325-
get_greatest_modulus(PartitionBoundInfo bound)
3221+
int
3222+
get_hash_partition_greatest_modulus(PartitionBoundInfo bound)
33263223
{
33273224
Assert(bound && bound->strategy == PARTITION_STRATEGY_HASH);
33283225
Assert(bound->datums && bound->ndatums > 0);
@@ -3336,7 +3233,7 @@ get_greatest_modulus(PartitionBoundInfo bound)
33363233
*
33373234
* Compute the hash value for given not null partition key values.
33383235
*/
3339-
static uint64
3236+
uint64
33403237
compute_hash_value(int partnatts, FmgrInfo *partsupfunc,
33413238
Datum *values, bool *isnull)
33423239
{

src/backend/nodes/copyfuncs.c

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2150,6 +2150,38 @@ _copyMergeAction(const MergeAction *from)
21502150
return newnode;
21512151
}
21522152

2153+
/*
2154+
* _copyPartitionPruneStepOp
2155+
*/
2156+
static PartitionPruneStepOp *
2157+
_copyPartitionPruneStepOp(const PartitionPruneStepOp *from)
2158+
{
2159+
PartitionPruneStepOp *newnode = makeNode(PartitionPruneStepOp);
2160+
2161+
COPY_SCALAR_FIELD(step.step_id);
2162+
COPY_SCALAR_FIELD(opstrategy);
2163+
COPY_NODE_FIELD(exprs);
2164+
COPY_NODE_FIELD(cmpfns);
2165+
COPY_BITMAPSET_FIELD(nullkeys);
2166+
2167+
return newnode;
2168+
}
2169+
2170+
/*
2171+
* _copyPartitionPruneStepCombine
2172+
*/
2173+
static PartitionPruneStepCombine *
2174+
_copyPartitionPruneStepCombine(const PartitionPruneStepCombine *from)
2175+
{
2176+
PartitionPruneStepCombine *newnode = makeNode(PartitionPruneStepCombine);
2177+
2178+
COPY_SCALAR_FIELD(step.step_id);
2179+
COPY_SCALAR_FIELD(combineOp);
2180+
COPY_NODE_FIELD(source_stepids);
2181+
2182+
return newnode;
2183+
}
2184+
21532185
/* ****************************************************************
21542186
* relation.h copy functions
21552187
*
@@ -2277,21 +2309,6 @@ _copyAppendRelInfo(const AppendRelInfo *from)
22772309
return newnode;
22782310
}
22792311

2280-
/*
2281-
* _copyPartitionedChildRelInfo
2282-
*/
2283-
static PartitionedChildRelInfo *
2284-
_copyPartitionedChildRelInfo(const PartitionedChildRelInfo *from)
2285-
{
2286-
PartitionedChildRelInfo *newnode = makeNode(PartitionedChildRelInfo);
2287-
2288-
COPY_SCALAR_FIELD(parent_relid);
2289-
COPY_NODE_FIELD(child_rels);
2290-
COPY_SCALAR_FIELD(part_cols_updated);
2291-
2292-
return newnode;
2293-
}
2294-
22952312
/*
22962313
* _copyPlaceHolderInfo
22972314
*/
@@ -5076,6 +5093,12 @@ copyObjectImpl(const void *from)
50765093
case T_MergeAction:
50775094
retval = _copyMergeAction(from);
50785095
break;
5096+
case T_PartitionPruneStepOp:
5097+
retval = _copyPartitionPruneStepOp(from);
5098+
break;
5099+
case T_PartitionPruneStepCombine:
5100+
retval = _copyPartitionPruneStepCombine(from);
5101+
break;
50795102

50805103
/*
50815104
* RELATION NODES
@@ -5095,9 +5118,6 @@ copyObjectImpl(const void *from)
50955118
case T_AppendRelInfo:
50965119
retval = _copyAppendRelInfo(from);
50975120
break;
5098-
case T_PartitionedChildRelInfo:
5099-
retval = _copyPartitionedChildRelInfo(from);
5100-
break;
51015121
case T_PlaceHolderInfo:
51025122
retval = _copyPlaceHolderInfo(from);
51035123
break;

src/backend/nodes/equalfuncs.c

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -915,16 +915,6 @@ _equalAppendRelInfo(const AppendRelInfo *a, const AppendRelInfo *b)
915915
return true;
916916
}
917917

918-
static bool
919-
_equalPartitionedChildRelInfo(const PartitionedChildRelInfo *a, const PartitionedChildRelInfo *b)
920-
{
921-
COMPARE_SCALAR_FIELD(parent_relid);
922-
COMPARE_NODE_FIELD(child_rels);
923-
COMPARE_SCALAR_FIELD(part_cols_updated);
924-
925-
return true;
926-
}
927-
928918
static bool
929919
_equalPlaceHolderInfo(const PlaceHolderInfo *a, const PlaceHolderInfo *b)
930920
{
@@ -3230,9 +3220,6 @@ equal(const void *a, const void *b)
32303220
case T_AppendRelInfo:
32313221
retval = _equalAppendRelInfo(a, b);
32323222
break;
3233-
case T_PartitionedChildRelInfo:
3234-
retval = _equalPartitionedChildRelInfo(a, b);
3235-
break;
32363223
case T_PlaceHolderInfo:
32373224
retval = _equalPlaceHolderInfo(a, b);
32383225
break;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy