Skip to content

Commit 626a120

Browse files
committed
Further optimize GIN multi-key searches.
When skipping over some items in a posting tree, re-find the new location by descending the tree from root, rather than walking the right links. This can save a lot of I/O. Heavily modified from Alexander Korotkov's fast scan patch.
1 parent 8440897 commit 626a120

File tree

3 files changed

+98
-29
lines changed

3 files changed

+98
-29
lines changed

src/backend/access/gin/gindatapage.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1639,16 +1639,15 @@ ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
16391639
* Starts a new scan on a posting tree.
16401640
*/
16411641
GinBtreeStack *
1642-
ginScanBeginPostingTree(Relation index, BlockNumber rootBlkno)
1642+
ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno)
16431643
{
1644-
GinBtreeData btree;
16451644
GinBtreeStack *stack;
16461645

1647-
ginPrepareDataScan(&btree, index, rootBlkno);
1646+
ginPrepareDataScan(btree, index, rootBlkno);
16481647

1649-
btree.fullScan = TRUE;
1648+
btree->fullScan = TRUE;
16501649

1651-
stack = ginFindLeafPage(&btree, TRUE);
1650+
stack = ginFindLeafPage(btree, TRUE);
16521651

16531652
return stack;
16541653
}

src/backend/access/gin/ginget.c

Lines changed: 92 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,13 @@ static void
9999
scanPostingTree(Relation index, GinScanEntry scanEntry,
100100
BlockNumber rootPostingTree)
101101
{
102+
GinBtreeData btree;
102103
GinBtreeStack *stack;
103104
Buffer buffer;
104105
Page page;
105106

106107
/* Descend to the leftmost leaf page */
107-
stack = ginScanBeginPostingTree(index, rootPostingTree);
108+
stack = ginScanBeginPostingTree(&btree, index, rootPostingTree);
108109
buffer = stack->buffer;
109110
IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
110111

@@ -412,7 +413,8 @@ startScanEntry(GinState *ginstate, GinScanEntry entry)
412413
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
413414
needUnlock = FALSE;
414415

415-
stack = ginScanBeginPostingTree(ginstate->index, rootPostingTree);
416+
stack = ginScanBeginPostingTree(&entry->btree, ginstate->index,
417+
rootPostingTree);
416418
entry->buffer = stack->buffer;
417419

418420
/*
@@ -506,8 +508,60 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
506508
{
507509
Page page;
508510
int i;
511+
bool stepright;
512+
513+
if (!BufferIsValid(entry->buffer))
514+
{
515+
entry->isFinished = true;
516+
return;
517+
}
518+
519+
/*
520+
* We have two strategies for finding the correct page: step right from
521+
* the current page, or descend the tree again from the root. If
522+
* advancePast equals the current item, the next matching item should be
523+
* on the next page, so we step right. Otherwise, descend from root.
524+
*/
525+
if (ginCompareItemPointers(&entry->curItem, &advancePast) == 0)
526+
{
527+
stepright = true;
528+
LockBuffer(entry->buffer, GIN_SHARE);
529+
}
530+
else
531+
{
532+
GinBtreeStack *stack;
533+
534+
ReleaseBuffer(entry->buffer);
535+
536+
/*
537+
* Set the search key, and find the correct leaf page.
538+
*/
539+
if (ItemPointerIsLossyPage(&advancePast))
540+
{
541+
ItemPointerSet(&entry->btree.itemptr,
542+
GinItemPointerGetBlockNumber(&advancePast) + 1,
543+
FirstOffsetNumber);
544+
}
545+
else
546+
{
547+
entry->btree.itemptr = advancePast;
548+
entry->btree.itemptr.ip_posid++;
549+
}
550+
entry->btree.fullScan = false;
551+
stack = ginFindLeafPage(&entry->btree, true);
552+
553+
/* we don't need the stack, just the buffer. */
554+
entry->buffer = stack->buffer;
555+
IncrBufferRefCount(entry->buffer);
556+
freeGinBtreeStack(stack);
557+
stepright = false;
558+
}
559+
560+
elog(DEBUG2, "entryLoadMoreItems, %u/%u, skip: %d",
561+
GinItemPointerGetBlockNumber(&advancePast),
562+
GinItemPointerGetOffsetNumber(&advancePast),
563+
!stepright);
509564

510-
LockBuffer(entry->buffer, GIN_SHARE);
511565
page = BufferGetPage(entry->buffer);
512566
for (;;)
513567
{
@@ -519,30 +573,34 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
519573
entry->nlist = 0;
520574
}
521575

522-
/*
523-
* We've processed all the entries on this page. If it was the last
524-
* page in the tree, we're done.
525-
*/
526-
if (GinPageRightMost(page))
576+
if (stepright)
527577
{
528-
UnlockReleaseBuffer(entry->buffer);
529-
entry->buffer = InvalidBuffer;
530-
entry->isFinished = TRUE;
531-
return;
578+
/*
579+
* We've processed all the entries on this page. If it was the last
580+
* page in the tree, we're done.
581+
*/
582+
if (GinPageRightMost(page))
583+
{
584+
UnlockReleaseBuffer(entry->buffer);
585+
entry->buffer = InvalidBuffer;
586+
entry->isFinished = TRUE;
587+
return;
588+
}
589+
590+
/*
591+
* Step to next page, following the right link. then find the first
592+
* ItemPointer greater than advancePast.
593+
*/
594+
entry->buffer = ginStepRight(entry->buffer,
595+
ginstate->index,
596+
GIN_SHARE);
597+
page = BufferGetPage(entry->buffer);
532598
}
599+
stepright = true;
533600

534601
if (GinPageGetOpaque(page)->flags & GIN_DELETED)
535602
continue; /* page was deleted by concurrent vacuum */
536603

537-
/*
538-
* Step to next page, following the right link. then find the first
539-
* ItemPointer greater than advancePast.
540-
*/
541-
entry->buffer = ginStepRight(entry->buffer,
542-
ginstate->index,
543-
GIN_SHARE);
544-
page = BufferGetPage(entry->buffer);
545-
546604
/*
547605
* The first item > advancePast might not be on this page, but
548606
* somewhere to the right, if the page was split, or a non-match from
@@ -566,8 +624,16 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
566624
{
567625
if (ginCompareItemPointers(&advancePast, &entry->list[i]) < 0)
568626
{
569-
LockBuffer(entry->buffer, GIN_UNLOCK);
570627
entry->offset = i;
628+
629+
if (GinPageRightMost(page))
630+
{
631+
/* after processing the copied items, we're done. */
632+
UnlockReleaseBuffer(entry->buffer);
633+
entry->buffer = InvalidBuffer;
634+
}
635+
else
636+
LockBuffer(entry->buffer, GIN_UNLOCK);
571637
return;
572638
}
573639
}
@@ -677,7 +743,10 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
677743
}
678744
else if (!BufferIsValid(entry->buffer))
679745
{
680-
/* A posting list from an entry tuple */
746+
/*
747+
* A posting list from an entry tuple, or the last page of a posting
748+
* tree.
749+
*/
681750
do
682751
{
683752
if (entry->offset >= entry->nlist)

src/include/access/gin_private.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -702,7 +702,7 @@ extern void GinPageDeletePostingItem(Page page, OffsetNumber offset);
702702
extern void ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
703703
ItemPointerData *items, uint32 nitem,
704704
GinStatsData *buildStats);
705-
extern GinBtreeStack *ginScanBeginPostingTree(Relation index, BlockNumber rootBlkno);
705+
extern GinBtreeStack *ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno);
706706
extern void ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage);
707707
extern void ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno);
708708

@@ -802,6 +802,7 @@ typedef struct GinScanEntryData
802802
bool isFinished;
803803
bool reduceResult;
804804
uint32 predictNumberResult;
805+
GinBtreeData btree;
805806
} GinScanEntryData;
806807

807808
typedef struct GinScanOpaqueData

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy