Skip to content

Commit 94e0333

Browse files
committed
Create a routine PageIndexMultiDelete() that replaces a loop around
PageIndexTupleDelete() with a single pass of compactification --- logic mostly lifted from PageRepairFragmentation. I noticed while profiling that a VACUUM that's cleaning up a whole lot of deleted tuples would spend as much as a third of its CPU time in PageIndexTupleDelete; not too surprising considering the loop method was roughly O(N^2) in the number of tuples involved.
1 parent 775d283 commit 94e0333

File tree

4 files changed

+144
-19
lines changed

4 files changed

+144
-19
lines changed

src/backend/access/nbtree/nbtpage.c

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
*
1010
*
1111
* IDENTIFICATION
12-
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.81 2004/12/31 21:59:22 pgsql Exp $
12+
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.82 2005/03/22 06:17:03 tgl Exp $
1313
*
1414
* NOTES
1515
* Postgres btree pages look like ordinary relation pages. The opaque
@@ -639,17 +639,12 @@ _bt_delitems(Relation rel, Buffer buf,
639639
OffsetNumber *itemnos, int nitems)
640640
{
641641
Page page = BufferGetPage(buf);
642-
int i;
643642

644643
/* No ereport(ERROR) until changes are logged */
645644
START_CRIT_SECTION();
646645

647-
/*
648-
* Delete the items in reverse order so we don't have to think about
649-
* adjusting item numbers for previous deletions.
650-
*/
651-
for (i = nitems - 1; i >= 0; i--)
652-
PageIndexTupleDelete(page, itemnos[i]);
646+
/* Fix the page */
647+
PageIndexMultiDelete(page, itemnos, nitems);
653648

654649
/* XLOG stuff */
655650
if (!rel->rd_istemp)

src/backend/access/nbtree/nbtxlog.c

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.19 2004/12/31 21:59:22 pgsql Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.20 2005/03/22 06:17:03 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -411,12 +411,7 @@ btree_xlog_delete(bool redo, XLogRecPtr lsn, XLogRecord *record)
411411
unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete);
412412
unend = (OffsetNumber *) ((char *) xlrec + record->xl_len);
413413

414-
/* be careful to delete from back to front */
415-
while (unused < unend)
416-
{
417-
unend--;
418-
PageIndexTupleDelete(page, *unend);
419-
}
414+
PageIndexMultiDelete(page, unused, unend - unused);
420415
}
421416

422417
PageSetLSN(page, lsn);

src/backend/storage/page/bufpage.c

Lines changed: 137 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.62 2004/12/31 22:01:10 pgsql Exp $
11+
* $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.63 2005/03/22 06:17:03 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -274,13 +274,14 @@ PageRestoreTempPage(Page tempPage, Page oldPage)
274274
}
275275

276276
/*
277-
* sorting support for PageRepairFragmentation
277+
* sorting support for PageRepairFragmentation and PageIndexMultiDelete
278278
*/
279279
typedef struct itemIdSortData
280280
{
281281
int offsetindex; /* linp array index */
282282
int itemoff; /* page offset of item data */
283283
Size alignedlen; /* MAXALIGN(item data len) */
284+
ItemIdData olditemid; /* used only in PageIndexMultiDelete */
284285
} itemIdSortData;
285286
typedef itemIdSortData *itemIdSort;
286287

@@ -297,7 +298,8 @@ itemoffcompare(const void *itemidp1, const void *itemidp2)
297298
*
298299
* Frees fragmented space on a page.
299300
* It doesn't remove unused line pointers! Please don't change this.
300-
* This routine is usable for heap pages only.
301+
*
302+
* This routine is usable for heap pages only, but see PageIndexMultiDelete.
301303
*
302304
* Returns number of unused line pointers on page. If "unused" is not NULL
303305
* then the unused[] array is filled with indexes of unused line pointers.
@@ -543,3 +545,135 @@ PageIndexTupleDelete(Page page, OffsetNumber offnum)
543545
}
544546
}
545547
}
548+
549+
550+
/*
551+
* PageIndexMultiDelete
552+
*
553+
* This routine handles the case of deleting multiple tuples from an
554+
* index page at once. It is considerably faster than a loop around
555+
* PageIndexTupleDelete ... however, the caller *must* supply the array
556+
* of item numbers to be deleted in item number order!
557+
*/
558+
void
559+
PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
560+
{
561+
PageHeader phdr = (PageHeader) page;
562+
Offset pd_lower = phdr->pd_lower;
563+
Offset pd_upper = phdr->pd_upper;
564+
Offset pd_special = phdr->pd_special;
565+
itemIdSort itemidbase,
566+
itemidptr;
567+
ItemId lp;
568+
int nline,
569+
nused;
570+
int i;
571+
Size totallen;
572+
Offset upper;
573+
Size size;
574+
unsigned offset;
575+
int nextitm;
576+
OffsetNumber offnum;
577+
578+
/*
579+
* If there aren't very many items to delete, then retail
580+
* PageIndexTupleDelete is the best way. Delete the items in reverse
581+
* order so we don't have to think about adjusting item numbers for
582+
* previous deletions.
583+
*
584+
* TODO: tune the magic number here
585+
*/
586+
if (nitems <= 2)
587+
{
588+
while (--nitems >= 0)
589+
PageIndexTupleDelete(page, itemnos[nitems]);
590+
return;
591+
}
592+
593+
/*
594+
* As with PageRepairFragmentation, paranoia seems justified.
595+
*/
596+
if (pd_lower < SizeOfPageHeaderData ||
597+
pd_lower > pd_upper ||
598+
pd_upper > pd_special ||
599+
pd_special > BLCKSZ ||
600+
pd_special != MAXALIGN(pd_special))
601+
ereport(ERROR,
602+
(errcode(ERRCODE_DATA_CORRUPTED),
603+
errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
604+
pd_lower, pd_upper, pd_special)));
605+
606+
/*
607+
* Scan the item pointer array and build a list of just the ones we
608+
* are going to keep. Notice we do not modify the page yet, since
609+
* we are still validity-checking.
610+
*/
611+
nline = PageGetMaxOffsetNumber(page);
612+
itemidbase = (itemIdSort) palloc(sizeof(itemIdSortData) * nline);
613+
itemidptr = itemidbase;
614+
totallen = 0;
615+
nused = 0;
616+
nextitm = 0;
617+
for (offnum = 1; offnum <= nline; offnum++)
618+
{
619+
lp = PageGetItemId(page, offnum);
620+
size = ItemIdGetLength(lp);
621+
offset = ItemIdGetOffset(lp);
622+
if (offset < pd_upper ||
623+
(offset + size) > pd_special ||
624+
offset != MAXALIGN(offset))
625+
ereport(ERROR,
626+
(errcode(ERRCODE_DATA_CORRUPTED),
627+
errmsg("corrupted item pointer: offset = %u, size = %u",
628+
offset, (unsigned int) size)));
629+
630+
if (nextitm < nitems && offnum == itemnos[nextitm])
631+
{
632+
/* skip item to be deleted */
633+
nextitm++;
634+
}
635+
else
636+
{
637+
itemidptr->offsetindex = nused; /* where it will go */
638+
itemidptr->itemoff = offset;
639+
itemidptr->olditemid = *lp;
640+
itemidptr->alignedlen = MAXALIGN(size);
641+
totallen += itemidptr->alignedlen;
642+
itemidptr++;
643+
nused++;
644+
}
645+
}
646+
647+
/* this will catch invalid or out-of-order itemnos[] */
648+
if (nextitm != nitems)
649+
elog(ERROR, "incorrect index offsets supplied");
650+
651+
if (totallen > (Size) (pd_special - pd_lower))
652+
ereport(ERROR,
653+
(errcode(ERRCODE_DATA_CORRUPTED),
654+
errmsg("corrupted item lengths: total %u, available space %u",
655+
(unsigned int) totallen, pd_special - pd_lower)));
656+
657+
/* sort itemIdSortData array into decreasing itemoff order */
658+
qsort((char *) itemidbase, nused, sizeof(itemIdSortData),
659+
itemoffcompare);
660+
661+
/* compactify page and install new itemids */
662+
upper = pd_special;
663+
664+
for (i = 0, itemidptr = itemidbase; i < nused; i++, itemidptr++)
665+
{
666+
lp = PageGetItemId(page, itemidptr->offsetindex + 1);
667+
upper -= itemidptr->alignedlen;
668+
memmove((char *) page + upper,
669+
(char *) page + itemidptr->itemoff,
670+
itemidptr->alignedlen);
671+
*lp = itemidptr->olditemid;
672+
lp->lp_off = upper;
673+
}
674+
675+
phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
676+
phdr->pd_upper = upper;
677+
678+
pfree(itemidbase);
679+
}

src/include/storage/bufpage.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/storage/bufpage.h,v 1.63 2004/12/31 22:03:42 pgsql Exp $
10+
* $PostgreSQL: pgsql/src/include/storage/bufpage.h,v 1.64 2005/03/22 06:17:03 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -323,5 +323,6 @@ extern void PageRestoreTempPage(Page tempPage, Page oldPage);
323323
extern int PageRepairFragmentation(Page page, OffsetNumber *unused);
324324
extern Size PageGetFreeSpace(Page page);
325325
extern void PageIndexTupleDelete(Page page, OffsetNumber offset);
326+
extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
326327

327328
#endif /* BUFPAGE_H */

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy