Skip to content

Commit e5adcb7

Browse files
Refactor nbtree insertion scankeys.
Use dedicated struct to represent nbtree insertion scan keys. Having a dedicated struct makes the difference between search type scankeys and insertion scankeys a lot clearer, and simplifies the signature of several related functions. This is based on a suggestion by Andrey Lepikhov. Streamline how unique index insertions cache binary search progress. Cache the state of in-progress binary searches within _bt_check_unique() for later instead of having callers avoid repeating the binary search in an ad-hoc manner. This makes it easy to add a new optimization: _bt_check_unique() now falls out of its loop immediately in the common case where it's already clear that there couldn't possibly be a duplicate. The new _bt_check_unique() scheme makes it a lot easier to manage cached binary search effort afterwards, from within _bt_findinsertloc(). This is needed for the upcoming patch to make nbtree tuples unique by treating heap TID as a final tiebreaker column. Unique key binary searches need to restore lower and upper bounds. They cannot simply continue to use the >= lower bound as the offset to insert at, because the heap TID tiebreaker column must be used in comparisons for the restored binary search (unlike the original _bt_check_unique() binary search, where scankey's heap TID column must be omitted). Author: Peter Geoghegan, Heikki Linnakangas Reviewed-By: Heikki Linnakangas, Andrey Lepikhov Discussion: https://postgr.es/m/CAH2-WzmE6AhUdk9NdWBf4K3HjWXZBX3+umC7mH7+WDrKcRtsOw@mail.gmail.com
1 parent 550b9d2 commit e5adcb7

File tree

9 files changed

+529
-387
lines changed

9 files changed

+529
-387
lines changed

contrib/amcheck/verify_nbtree.c

Lines changed: 24 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,9 @@ static void bt_check_every_level(Relation rel, Relation heaprel,
127127
static BtreeLevel bt_check_level_from_leftmost(BtreeCheckState *state,
128128
BtreeLevel level);
129129
static void bt_target_page_check(BtreeCheckState *state);
130-
static ScanKey bt_right_page_check_scankey(BtreeCheckState *state);
131-
static void bt_downlink_check(BtreeCheckState *state, BlockNumber childblock,
132-
ScanKey targetkey);
130+
static BTScanInsert bt_right_page_check_scankey(BtreeCheckState *state);
131+
static void bt_downlink_check(BtreeCheckState *state, BTScanInsert targetkey,
132+
BlockNumber childblock);
133133
static void bt_downlink_missing_check(BtreeCheckState *state);
134134
static void bt_tuple_present_callback(Relation index, HeapTuple htup,
135135
Datum *values, bool *isnull,
@@ -139,14 +139,14 @@ static IndexTuple bt_normalize_tuple(BtreeCheckState *state,
139139
static inline bool offset_is_negative_infinity(BTPageOpaque opaque,
140140
OffsetNumber offset);
141141
static inline bool invariant_leq_offset(BtreeCheckState *state,
142-
ScanKey key,
142+
BTScanInsert key,
143143
OffsetNumber upperbound);
144144
static inline bool invariant_geq_offset(BtreeCheckState *state,
145-
ScanKey key,
145+
BTScanInsert key,
146146
OffsetNumber lowerbound);
147147
static inline bool invariant_leq_nontarget_offset(BtreeCheckState *state,
148-
Page other,
149-
ScanKey key,
148+
BTScanInsert key,
149+
Page nontarget,
150150
OffsetNumber upperbound);
151151
static Page palloc_btree_page(BtreeCheckState *state, BlockNumber blocknum);
152152

@@ -838,8 +838,8 @@ bt_target_page_check(BtreeCheckState *state)
838838
{
839839
ItemId itemid;
840840
IndexTuple itup;
841-
ScanKey skey;
842841
size_t tupsize;
842+
BTScanInsert skey;
843843

844844
CHECK_FOR_INTERRUPTS();
845845

@@ -1030,7 +1030,7 @@ bt_target_page_check(BtreeCheckState *state)
10301030
*/
10311031
else if (offset == max)
10321032
{
1033-
ScanKey rightkey;
1033+
BTScanInsert rightkey;
10341034

10351035
/* Get item in next/right page */
10361036
rightkey = bt_right_page_check_scankey(state);
@@ -1082,7 +1082,7 @@ bt_target_page_check(BtreeCheckState *state)
10821082
{
10831083
BlockNumber childblock = BTreeInnerTupleGetDownLink(itup);
10841084

1085-
bt_downlink_check(state, childblock, skey);
1085+
bt_downlink_check(state, skey, childblock);
10861086
}
10871087
}
10881088

@@ -1111,11 +1111,12 @@ bt_target_page_check(BtreeCheckState *state)
11111111
* Note that !readonly callers must reverify that target page has not
11121112
* been concurrently deleted.
11131113
*/
1114-
static ScanKey
1114+
static BTScanInsert
11151115
bt_right_page_check_scankey(BtreeCheckState *state)
11161116
{
11171117
BTPageOpaque opaque;
11181118
ItemId rightitem;
1119+
IndexTuple firstitup;
11191120
BlockNumber targetnext;
11201121
Page rightpage;
11211122
OffsetNumber nline;
@@ -1303,8 +1304,8 @@ bt_right_page_check_scankey(BtreeCheckState *state)
13031304
* Return first real item scankey. Note that this relies on right page
13041305
* memory remaining allocated.
13051306
*/
1306-
return _bt_mkscankey(state->rel,
1307-
(IndexTuple) PageGetItem(rightpage, rightitem));
1307+
firstitup = (IndexTuple) PageGetItem(rightpage, rightitem);
1308+
return _bt_mkscankey(state->rel, firstitup);
13081309
}
13091310

13101311
/*
@@ -1317,8 +1318,8 @@ bt_right_page_check_scankey(BtreeCheckState *state)
13171318
* verification this way around is much more practical.
13181319
*/
13191320
static void
1320-
bt_downlink_check(BtreeCheckState *state, BlockNumber childblock,
1321-
ScanKey targetkey)
1321+
bt_downlink_check(BtreeCheckState *state, BTScanInsert targetkey,
1322+
BlockNumber childblock)
13221323
{
13231324
OffsetNumber offset;
13241325
OffsetNumber maxoffset;
@@ -1423,8 +1424,7 @@ bt_downlink_check(BtreeCheckState *state, BlockNumber childblock,
14231424
if (offset_is_negative_infinity(copaque, offset))
14241425
continue;
14251426

1426-
if (!invariant_leq_nontarget_offset(state, child,
1427-
targetkey, offset))
1427+
if (!invariant_leq_nontarget_offset(state, targetkey, child, offset))
14281428
ereport(ERROR,
14291429
(errcode(ERRCODE_INDEX_CORRUPTED),
14301430
errmsg("down-link lower bound invariant violated for index \"%s\"",
@@ -1864,13 +1864,12 @@ offset_is_negative_infinity(BTPageOpaque opaque, OffsetNumber offset)
18641864
* to corruption.
18651865
*/
18661866
static inline bool
1867-
invariant_leq_offset(BtreeCheckState *state, ScanKey key,
1867+
invariant_leq_offset(BtreeCheckState *state, BTScanInsert key,
18681868
OffsetNumber upperbound)
18691869
{
1870-
int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
18711870
int32 cmp;
18721871

1873-
cmp = _bt_compare(state->rel, nkeyatts, key, state->target, upperbound);
1872+
cmp = _bt_compare(state->rel, key, state->target, upperbound);
18741873

18751874
return cmp <= 0;
18761875
}
@@ -1883,13 +1882,12 @@ invariant_leq_offset(BtreeCheckState *state, ScanKey key,
18831882
* to corruption.
18841883
*/
18851884
static inline bool
1886-
invariant_geq_offset(BtreeCheckState *state, ScanKey key,
1885+
invariant_geq_offset(BtreeCheckState *state, BTScanInsert key,
18871886
OffsetNumber lowerbound)
18881887
{
1889-
int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
18901888
int32 cmp;
18911889

1892-
cmp = _bt_compare(state->rel, nkeyatts, key, state->target, lowerbound);
1890+
cmp = _bt_compare(state->rel, key, state->target, lowerbound);
18931891

18941892
return cmp >= 0;
18951893
}
@@ -1905,14 +1903,12 @@ invariant_geq_offset(BtreeCheckState *state, ScanKey key,
19051903
* to corruption.
19061904
*/
19071905
static inline bool
1908-
invariant_leq_nontarget_offset(BtreeCheckState *state,
1909-
Page nontarget, ScanKey key,
1910-
OffsetNumber upperbound)
1906+
invariant_leq_nontarget_offset(BtreeCheckState *state, BTScanInsert key,
1907+
Page nontarget, OffsetNumber upperbound)
19111908
{
1912-
int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
19131909
int32 cmp;
19141910

1915-
cmp = _bt_compare(state->rel, nkeyatts, key, nontarget, upperbound);
1911+
cmp = _bt_compare(state->rel, key, nontarget, upperbound);
19161912

19171913
return cmp <= 0;
19181914
}

src/backend/access/nbtree/README

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -598,19 +598,22 @@ scankey point to comparison functions that return boolean, such as int4lt.
598598
There might be more than one scankey entry for a given index column, or
599599
none at all. (We require the keys to appear in index column order, but
600600
the order of multiple keys for a given column is unspecified.) An
601-
insertion scankey uses the same array-of-ScanKey data structure, but the
602-
sk_func pointers point to btree comparison support functions (ie, 3-way
603-
comparators that return int4 values interpreted as <0, =0, >0). In an
604-
insertion scankey there is exactly one entry per index column. Insertion
605-
scankeys are built within the btree code (eg, by _bt_mkscankey()) and are
606-
used to locate the starting point of a scan, as well as for locating the
607-
place to insert a new index tuple. (Note: in the case of an insertion
608-
scankey built from a search scankey, there might be fewer keys than
609-
index columns, indicating that we have no constraints for the remaining
610-
index columns.) After we have located the starting point of a scan, the
611-
original search scankey is consulted as each index entry is sequentially
612-
scanned to decide whether to return the entry and whether the scan can
613-
stop (see _bt_checkkeys()).
601+
insertion scankey ("BTScanInsert" data structure) uses a similar
602+
array-of-ScanKey data structure, but the sk_func pointers point to btree
603+
comparison support functions (ie, 3-way comparators that return int4 values
604+
interpreted as <0, =0, >0). In an insertion scankey there is at most one
605+
entry per index column. There is also other data about the rules used to
606+
locate where to begin the scan, such as whether or not the scan is a
607+
"nextkey" scan. Insertion scankeys are built within the btree code (eg, by
608+
_bt_mkscankey()) and are used to locate the starting point of a scan, as
609+
well as for locating the place to insert a new index tuple. (Note: in the
610+
case of an insertion scankey built from a search scankey or built from a
611+
truncated pivot tuple, there might be fewer keys than index columns,
612+
indicating that we have no constraints for the remaining index columns.)
613+
After we have located the starting point of a scan, the original search
614+
scankey is consulted as each index entry is sequentially scanned to decide
615+
whether to return the entry and whether the scan can stop (see
616+
_bt_checkkeys()).
614617

615618
We use term "pivot" index tuples to distinguish tuples which don't point
616619
to heap tuples, but rather used for tree navigation. Pivot tuples includes

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy