Skip to content

Commit 1663f33

Browse files
committed
Tweak btree page split logic so that when splitting a page that is
rightmost on its tree level, we split 2/3 to the left and 1/3 to the new right page, rather than the even split we use elsewhere. The idea is that when faced with a steadily increasing series of inserted keys (such as sequence or timestamp values), we'll end up with a btree that's about 2/3ds full not 1/2 full, which is much closer to the desired steady-state load for a btree. Per suggestion from Ann Harrison of IBPhoenix.
1 parent 1647d3a commit 1663f33

File tree

1 file changed

+31
-6
lines changed

1 file changed

+31
-6
lines changed

src/backend/access/nbtree/nbtinsert.c

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.85 2001/08/23 23:06:37 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.86 2001/09/29 23:49:51 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -24,7 +24,8 @@ typedef struct
2424
{
2525
/* context data for _bt_checksplitloc */
2626
Size newitemsz; /* size of new item to be inserted */
27-
bool non_leaf; /* T if splitting an internal node */
27+
bool is_leaf; /* T if splitting a leaf page */
28+
bool is_rightmost; /* T if splitting a rightmost page */
2829

2930
bool have_split; /* found a valid split? */
3031

@@ -940,6 +941,16 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
940941
* for it, we might find ourselves with too little room on the page that
941942
* it needs to go into!)
942943
*
944+
* If the page is the rightmost page on its level, we instead try to arrange
945+
* for twice as much free space on the right as on the left. In this way,
946+
* when we are inserting successively increasing keys (consider sequences,
947+
* timestamps, etc) we will end up with a tree whose pages are about 67% full,
948+
* instead of the 50% full result that we'd get without this special case.
949+
* (We could bias it even further to make the initially-loaded tree more full.
950+
* But since the steady-state load for a btree is about 70%, we'd likely just
951+
* be making more page-splitting work for ourselves later on, when we start
952+
* seeing updates to existing tuples.)
953+
*
943954
* We are passed the intended insert position of the new tuple, expressed as
944955
* the offsetnumber of the tuple it must go in front of. (This could be
945956
* maxoff+1 if the tuple is to go at the end.)
@@ -972,7 +983,8 @@ _bt_findsplitloc(Relation rel,
972983
/* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
973984
newitemsz += sizeof(ItemIdData);
974985
state.newitemsz = newitemsz;
975-
state.non_leaf = !P_ISLEAF(opaque);
986+
state.is_leaf = P_ISLEAF(opaque);
987+
state.is_rightmost = P_RIGHTMOST(opaque);
976988
state.have_split = false;
977989

978990
/* Total free space available on a btree page, after fixed overhead */
@@ -1076,7 +1088,6 @@ _bt_checksplitloc(FindSplitData *state, OffsetNumber firstright,
10761088
int leftfree, int rightfree,
10771089
bool newitemonleft, Size firstrightitemsz)
10781090
{
1079-
10801091
/*
10811092
* Account for the new item on whichever side it is to be put.
10821093
*/
@@ -1089,7 +1100,7 @@ _bt_checksplitloc(FindSplitData *state, OffsetNumber firstright,
10891100
* If we are not on the leaf level, we will be able to discard the key
10901101
* data from the first item that winds up on the right page.
10911102
*/
1092-
if (state->non_leaf)
1103+
if (! state->is_leaf)
10931104
rightfree += (int) firstrightitemsz -
10941105
(int) (MAXALIGN(sizeof(BTItemData)) + sizeof(ItemIdData));
10951106

@@ -1098,7 +1109,21 @@ _bt_checksplitloc(FindSplitData *state, OffsetNumber firstright,
10981109
*/
10991110
if (leftfree >= 0 && rightfree >= 0)
11001111
{
1101-
int delta = leftfree - rightfree;
1112+
int delta;
1113+
1114+
if (state->is_rightmost)
1115+
{
1116+
/*
1117+
* On a rightmost page, try to equalize right free space with
1118+
* twice the left free space. See comments for _bt_findsplitloc.
1119+
*/
1120+
delta = (2 * leftfree) - rightfree;
1121+
}
1122+
else
1123+
{
1124+
/* Otherwise, aim for equal free space on both sides */
1125+
delta = leftfree - rightfree;
1126+
}
11021127

11031128
if (delta < 0)
11041129
delta = -delta;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy