Skip to content

Commit 8224de4

Browse files
committed
Indexes with INCLUDE columns and their support in B-tree
This patch introduces INCLUDE clause to index definition. This clause specifies a list of columns which will be included as a non-key part in the index. The INCLUDE columns exist solely to allow more queries to benefit from index-only scans. Also, such columns don't need to have appropriate operator classes. Expressions are not supported as INCLUDE columns since they cannot be used in index-only scans. Index access methods supporting INCLUDE are indicated by amcaninclude flag in IndexAmRoutine. For now, only B-tree indexes support INCLUDE clause. In B-tree indexes INCLUDE columns are truncated from pivot index tuples (tuples located in non-leaf pages and high keys). Therefore, B-tree indexes now might have variable number of attributes. This patch also provides generic facility to support that: pivot tuples contain number of their attributes in t_tid.ip_posid. Free 13th bit of t_info is used for indicating that. This facility will simplify further support of index suffix truncation. The changes of above are backward-compatible, pg_upgrade doesn't need special handling of B-tree indexes for that. Bump catalog version Author: Anastasia Lubennikova with contribition by Alexander Korotkov and me Reviewed by: Peter Geoghegan, Tomas Vondra, Antonin Houska, Jeff Janes, David Rowley, Alexander Korotkov Discussion: https://www.postgresql.org/message-id/flat/56168952.4010101@postgrespro.ru
1 parent 01bb851 commit 8224de4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

89 files changed

+2115
-470
lines changed

contrib/amcheck/expected/check_btree.out

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
-- minimal test, basically just verifying that amcheck
22
CREATE TABLE bttest_a(id int8);
33
CREATE TABLE bttest_b(id int8);
4+
CREATE TABLE bttest_multi(id int8, data int8);
45
INSERT INTO bttest_a SELECT * FROM generate_series(1, 100000);
56
INSERT INTO bttest_b SELECT * FROM generate_series(100000, 1, -1);
7+
INSERT INTO bttest_multi SELECT i, i%2 FROM generate_series(1, 100000) as i;
68
CREATE INDEX bttest_a_idx ON bttest_a USING btree (id);
79
CREATE INDEX bttest_b_idx ON bttest_b USING btree (id);
10+
CREATE UNIQUE INDEX bttest_multi_idx ON bttest_multi
11+
USING btree (id) INCLUDE (data);
812
CREATE ROLE bttest_role;
913
-- verify permissions are checked (error due to function not callable)
1014
SET ROLE bttest_role;
@@ -93,8 +97,50 @@ WHERE relation = ANY(ARRAY['bttest_a', 'bttest_a_idx', 'bttest_b', 'bttest_b_idx
9397
(0 rows)
9498

9599
COMMIT;
100+
-- normal check outside of xact for index with included columns
101+
SELECT bt_index_check('bttest_multi_idx');
102+
bt_index_check
103+
----------------
104+
105+
(1 row)
106+
107+
-- more expansive test for index with included columns
108+
SELECT bt_index_parent_check('bttest_multi_idx', true);
109+
bt_index_parent_check
110+
-----------------------
111+
112+
(1 row)
113+
114+
SELECT bt_index_parent_check('bttest_multi_idx', true);
115+
bt_index_parent_check
116+
-----------------------
117+
118+
(1 row)
119+
120+
-- repeat same checks with index made by insertions
121+
TRUNCATE bttest_multi;
122+
INSERT INTO bttest_multi SELECT i, i%2 FROM generate_series(1, 100000) as i;
123+
SELECT bt_index_check('bttest_multi_idx');
124+
bt_index_check
125+
----------------
126+
127+
(1 row)
128+
129+
SELECT bt_index_parent_check('bttest_multi_idx', true);
130+
bt_index_parent_check
131+
-----------------------
132+
133+
(1 row)
134+
135+
SELECT bt_index_parent_check('bttest_multi_idx', true);
136+
bt_index_parent_check
137+
-----------------------
138+
139+
(1 row)
140+
96141
-- cleanup
97142
DROP TABLE bttest_a;
98143
DROP TABLE bttest_b;
144+
DROP TABLE bttest_multi;
99145
DROP OWNED BY bttest_role; -- permissions
100146
DROP ROLE bttest_role;

contrib/amcheck/sql/check_btree.sql

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
-- minimal test, basically just verifying that amcheck
22
CREATE TABLE bttest_a(id int8);
33
CREATE TABLE bttest_b(id int8);
4+
CREATE TABLE bttest_multi(id int8, data int8);
45

56
INSERT INTO bttest_a SELECT * FROM generate_series(1, 100000);
67
INSERT INTO bttest_b SELECT * FROM generate_series(100000, 1, -1);
8+
INSERT INTO bttest_multi SELECT i, i%2 FROM generate_series(1, 100000) as i;
79

810
CREATE INDEX bttest_a_idx ON bttest_a USING btree (id);
911
CREATE INDEX bttest_b_idx ON bttest_b USING btree (id);
12+
CREATE UNIQUE INDEX bttest_multi_idx ON bttest_multi
13+
USING btree (id) INCLUDE (data);
1014

1115
CREATE ROLE bttest_role;
1216

@@ -57,8 +61,23 @@ WHERE relation = ANY(ARRAY['bttest_a', 'bttest_a_idx', 'bttest_b', 'bttest_b_idx
5761
AND pid = pg_backend_pid();
5862
COMMIT;
5963

64+
-- normal check outside of xact for index with included columns
65+
SELECT bt_index_check('bttest_multi_idx');
66+
-- more expansive test for index with included columns
67+
SELECT bt_index_parent_check('bttest_multi_idx', true);
68+
SELECT bt_index_parent_check('bttest_multi_idx', true);
69+
70+
-- repeat same checks with index made by insertions
71+
TRUNCATE bttest_multi;
72+
INSERT INTO bttest_multi SELECT i, i%2 FROM generate_series(1, 100000) as i;
73+
SELECT bt_index_check('bttest_multi_idx');
74+
SELECT bt_index_parent_check('bttest_multi_idx', true);
75+
SELECT bt_index_parent_check('bttest_multi_idx', true);
76+
77+
6078
-- cleanup
6179
DROP TABLE bttest_a;
6280
DROP TABLE bttest_b;
81+
DROP TABLE bttest_multi;
6382
DROP OWNED BY bttest_role; -- permissions
6483
DROP ROLE bttest_role;

contrib/amcheck/verify_nbtree.c

Lines changed: 82 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -617,7 +617,7 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
617617
/* Internal page -- downlink gets leftmost on next level */
618618
itemid = PageGetItemId(state->target, P_FIRSTDATAKEY(opaque));
619619
itup = (IndexTuple) PageGetItem(state->target, itemid);
620-
nextleveldown.leftmost = ItemPointerGetBlockNumber(&(itup->t_tid));
620+
nextleveldown.leftmost = ItemPointerGetBlockNumberNoCheck(&(itup->t_tid));
621621
nextleveldown.level = opaque->btpo.level - 1;
622622
}
623623
else
@@ -722,6 +722,39 @@ bt_target_page_check(BtreeCheckState *state)
722722
elog(DEBUG2, "verifying %u items on %s block %u", max,
723723
P_ISLEAF(topaque) ? "leaf" : "internal", state->targetblock);
724724

725+
726+
/* Check the number of attributes in high key if any */
727+
if (!P_RIGHTMOST(topaque))
728+
{
729+
if (!_bt_check_natts(state->rel, state->target, P_HIKEY))
730+
{
731+
ItemId itemid;
732+
IndexTuple itup;
733+
char *itid,
734+
*htid;
735+
736+
itemid = PageGetItemId(state->target, P_HIKEY);
737+
itup = (IndexTuple) PageGetItem(state->target, itemid);
738+
itid = psprintf("(%u,%u)", state->targetblock, P_HIKEY);
739+
htid = psprintf("(%u,%u)",
740+
ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)),
741+
ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid)));
742+
743+
ereport(ERROR,
744+
(errcode(ERRCODE_INDEX_CORRUPTED),
745+
errmsg("wrong number of index tuple attributes for index \"%s\"",
746+
RelationGetRelationName(state->rel)),
747+
errdetail_internal("Index tid=%s natts=%u points to %s tid=%s page lsn=%X/%X.",
748+
itid,
749+
BTreeTupGetNAtts(itup, state->rel),
750+
P_ISLEAF(topaque) ? "heap" : "index",
751+
htid,
752+
(uint32) (state->targetlsn >> 32),
753+
(uint32) state->targetlsn)));
754+
}
755+
}
756+
757+
725758
/*
726759
* Loop over page items, starting from first non-highkey item, not high
727760
* key (if any). Also, immediately skip "negative infinity" real item (if
@@ -760,6 +793,30 @@ bt_target_page_check(BtreeCheckState *state)
760793
(uint32) state->targetlsn),
761794
errhint("This could be a torn page problem")));
762795

796+
/* Check the number of index tuple attributes */
797+
if (!_bt_check_natts(state->rel, state->target, offset))
798+
{
799+
char *itid,
800+
*htid;
801+
802+
itid = psprintf("(%u,%u)", state->targetblock, offset);
803+
htid = psprintf("(%u,%u)",
804+
ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)),
805+
ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid)));
806+
807+
ereport(ERROR,
808+
(errcode(ERRCODE_INDEX_CORRUPTED),
809+
errmsg("wrong number of index tuple attributes for index \"%s\"",
810+
RelationGetRelationName(state->rel)),
811+
errdetail_internal("Index tid=%s natts=%u points to %s tid=%s page lsn=%X/%X.",
812+
itid,
813+
BTreeTupGetNAtts(itup, state->rel),
814+
P_ISLEAF(topaque) ? "heap" : "index",
815+
htid,
816+
(uint32) (state->targetlsn >> 32),
817+
(uint32) state->targetlsn)));
818+
}
819+
763820
/*
764821
* Don't try to generate scankey using "negative infinity" garbage
765822
* data on internal pages
@@ -802,8 +859,8 @@ bt_target_page_check(BtreeCheckState *state)
802859

803860
itid = psprintf("(%u,%u)", state->targetblock, offset);
804861
htid = psprintf("(%u,%u)",
805-
ItemPointerGetBlockNumber(&(itup->t_tid)),
806-
ItemPointerGetOffsetNumber(&(itup->t_tid)));
862+
ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)),
863+
ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid)));
807864

808865
ereport(ERROR,
809866
(errcode(ERRCODE_INDEX_CORRUPTED),
@@ -834,17 +891,17 @@ bt_target_page_check(BtreeCheckState *state)
834891

835892
itid = psprintf("(%u,%u)", state->targetblock, offset);
836893
htid = psprintf("(%u,%u)",
837-
ItemPointerGetBlockNumber(&(itup->t_tid)),
838-
ItemPointerGetOffsetNumber(&(itup->t_tid)));
894+
ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)),
895+
ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid)));
839896
nitid = psprintf("(%u,%u)", state->targetblock,
840897
OffsetNumberNext(offset));
841898

842899
/* Reuse itup to get pointed-to heap location of second item */
843900
itemid = PageGetItemId(state->target, OffsetNumberNext(offset));
844901
itup = (IndexTuple) PageGetItem(state->target, itemid);
845902
nhtid = psprintf("(%u,%u)",
846-
ItemPointerGetBlockNumber(&(itup->t_tid)),
847-
ItemPointerGetOffsetNumber(&(itup->t_tid)));
903+
ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)),
904+
ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid)));
848905

849906
ereport(ERROR,
850907
(errcode(ERRCODE_INDEX_CORRUPTED),
@@ -932,7 +989,7 @@ bt_target_page_check(BtreeCheckState *state)
932989
*/
933990
if (!P_ISLEAF(topaque) && state->readonly)
934991
{
935-
BlockNumber childblock = ItemPointerGetBlockNumber(&(itup->t_tid));
992+
BlockNumber childblock = ItemPointerGetBlockNumberNoCheck(&(itup->t_tid));
936993

937994
bt_downlink_check(state, childblock, skey);
938995
}
@@ -1326,6 +1383,11 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values,
13261383
* or otherwise varied when or how compression was applied, our assumption
13271384
* would break, leading to false positive reports of corruption. For now,
13281385
* we don't decompress/normalize toasted values as part of fingerprinting.
1386+
*
1387+
* In future, non-pivot index tuples might get use of
1388+
* BT_N_KEYS_OFFSET_MASK. Then binary representation of index tuple linked
1389+
* to particular heap tuple might vary and meeds to be normalized before
1390+
* bloom filter lookup.
13291391
*/
13301392
itup = index_form_tuple(RelationGetDescr(index), values, isnull);
13311393
itup->t_tid = htup->t_self;
@@ -1336,8 +1398,8 @@ bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values,
13361398
ereport(ERROR,
13371399
(errcode(ERRCODE_DATA_CORRUPTED),
13381400
errmsg("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"",
1339-
ItemPointerGetBlockNumber(&(itup->t_tid)),
1340-
ItemPointerGetOffsetNumber(&(itup->t_tid)),
1401+
ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)),
1402+
ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid)),
13411403
RelationGetRelationName(state->heaprel),
13421404
RelationGetRelationName(state->rel)),
13431405
!state->readonly
@@ -1368,6 +1430,10 @@ offset_is_negative_infinity(BTPageOpaque opaque, OffsetNumber offset)
13681430
* infinity item is either first or second line item, or there is none
13691431
* within page.
13701432
*
1433+
* "Negative infinity" tuple is a special corner case of pivot tuples,
1434+
* it has zero attributes while rest of pivot tuples have nkeyatts number
1435+
* of attributes.
1436+
*
13711437
* Right-most pages don't have a high key, but could be said to
13721438
* conceptually have a "positive infinity" high key. Thus, there is a
13731439
* symmetry between down link items in parent pages, and high keys in
@@ -1391,10 +1457,10 @@ static inline bool
13911457
invariant_leq_offset(BtreeCheckState *state, ScanKey key,
13921458
OffsetNumber upperbound)
13931459
{
1394-
int16 natts = state->rel->rd_rel->relnatts;
1460+
int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
13951461
int32 cmp;
13961462

1397-
cmp = _bt_compare(state->rel, natts, key, state->target, upperbound);
1463+
cmp = _bt_compare(state->rel, nkeyatts, key, state->target, upperbound);
13981464

13991465
return cmp <= 0;
14001466
}
@@ -1410,10 +1476,10 @@ static inline bool
14101476
invariant_geq_offset(BtreeCheckState *state, ScanKey key,
14111477
OffsetNumber lowerbound)
14121478
{
1413-
int16 natts = state->rel->rd_rel->relnatts;
1479+
int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
14141480
int32 cmp;
14151481

1416-
cmp = _bt_compare(state->rel, natts, key, state->target, lowerbound);
1482+
cmp = _bt_compare(state->rel, nkeyatts, key, state->target, lowerbound);
14171483

14181484
return cmp >= 0;
14191485
}
@@ -1433,10 +1499,10 @@ invariant_leq_nontarget_offset(BtreeCheckState *state,
14331499
Page nontarget, ScanKey key,
14341500
OffsetNumber upperbound)
14351501
{
1436-
int16 natts = state->rel->rd_rel->relnatts;
1502+
int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
14371503
int32 cmp;
14381504

1439-
cmp = _bt_compare(state->rel, natts, key, nontarget, upperbound);
1505+
cmp = _bt_compare(state->rel, nkeyatts, key, nontarget, upperbound);
14401506

14411507
return cmp <= 0;
14421508
}

contrib/bloom/blutils.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ blhandler(PG_FUNCTION_ARGS)
120120
amroutine->amclusterable = false;
121121
amroutine->ampredlocks = false;
122122
amroutine->amcanparallel = false;
123+
amroutine->amcaninclude = false;
123124
amroutine->amkeytype = InvalidOid;
124125

125126
amroutine->ambuild = blbuild;

contrib/dblink/dblink.c

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ static remoteConn *getConnectionByName(const char *name);
100100
static HTAB *createConnHash(void);
101101
static void createNewConnection(const char *name, remoteConn *rconn);
102102
static void deleteConnection(const char *name);
103-
static char **get_pkey_attnames(Relation rel, int16 *numatts);
103+
static char **get_pkey_attnames(Relation rel, int16 *indnkeyatts);
104104
static char **get_text_array_contents(ArrayType *array, int *numitems);
105105
static char *get_sql_insert(Relation rel, int *pkattnums, int pknumatts, char **src_pkattvals, char **tgt_pkattvals);
106106
static char *get_sql_delete(Relation rel, int *pkattnums, int pknumatts, char **tgt_pkattvals);
@@ -1493,7 +1493,7 @@ PG_FUNCTION_INFO_V1(dblink_get_pkey);
14931493
Datum
14941494
dblink_get_pkey(PG_FUNCTION_ARGS)
14951495
{
1496-
int16 numatts;
1496+
int16 indnkeyatts;
14971497
char **results;
14981498
FuncCallContext *funcctx;
14991499
int32 call_cntr;
@@ -1519,7 +1519,7 @@ dblink_get_pkey(PG_FUNCTION_ARGS)
15191519
rel = get_rel_from_relname(PG_GETARG_TEXT_PP(0), AccessShareLock, ACL_SELECT);
15201520

15211521
/* get the array of attnums */
1522-
results = get_pkey_attnames(rel, &numatts);
1522+
results = get_pkey_attnames(rel, &indnkeyatts);
15231523

15241524
relation_close(rel, AccessShareLock);
15251525

@@ -1539,9 +1539,9 @@ dblink_get_pkey(PG_FUNCTION_ARGS)
15391539
attinmeta = TupleDescGetAttInMetadata(tupdesc);
15401540
funcctx->attinmeta = attinmeta;
15411541

1542-
if ((results != NULL) && (numatts > 0))
1542+
if ((results != NULL) && (indnkeyatts > 0))
15431543
{
1544-
funcctx->max_calls = numatts;
1544+
funcctx->max_calls = indnkeyatts;
15451545

15461546
/* got results, keep track of them */
15471547
funcctx->user_fctx = results;
@@ -2029,10 +2029,10 @@ dblink_fdw_validator(PG_FUNCTION_ARGS)
20292029
* get_pkey_attnames
20302030
*
20312031
* Get the primary key attnames for the given relation.
2032-
* Return NULL, and set numatts = 0, if no primary key exists.
2032+
* Return NULL, and set indnkeyatts = 0, if no primary key exists.
20332033
*/
20342034
static char **
2035-
get_pkey_attnames(Relation rel, int16 *numatts)
2035+
get_pkey_attnames(Relation rel, int16 *indnkeyatts)
20362036
{
20372037
Relation indexRelation;
20382038
ScanKeyData skey;
@@ -2042,8 +2042,8 @@ get_pkey_attnames(Relation rel, int16 *numatts)
20422042
char **result = NULL;
20432043
TupleDesc tupdesc;
20442044

2045-
/* initialize numatts to 0 in case no primary key exists */
2046-
*numatts = 0;
2045+
/* initialize indnkeyatts to 0 in case no primary key exists */
2046+
*indnkeyatts = 0;
20472047

20482048
tupdesc = rel->rd_att;
20492049

@@ -2064,12 +2064,12 @@ get_pkey_attnames(Relation rel, int16 *numatts)
20642064
/* we're only interested if it is the primary key */
20652065
if (index->indisprimary)
20662066
{
2067-
*numatts = index->indnatts;
2068-
if (*numatts > 0)
2067+
*indnkeyatts = index->indnkeyatts;
2068+
if (*indnkeyatts > 0)
20692069
{
2070-
result = (char **) palloc(*numatts * sizeof(char *));
2070+
result = (char **) palloc(*indnkeyatts * sizeof(char *));
20712071

2072-
for (i = 0; i < *numatts; i++)
2072+
for (i = 0; i < *indnkeyatts; i++)
20732073
result[i] = SPI_fname(tupdesc, index->indkey.values[i]);
20742074
}
20752075
break;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy