Skip to content

Commit a760893

Browse files
Derive latestRemovedXid for btree deletes by reading heap pages. The
WAL record for btree delete contains a list of tids, even when backup blocks are present. We follow the tids to their heap tuples, taking care to follow LP_REDIRECT tuples. We ignore LP_DEAD tuples on the understanding that they will always have xmin/xmax earlier than any LP_NORMAL tuples referred to by killed index tuples. Iff all tuples are LP_DEAD we return InvalidTransactionId. The heap relfilenode is added to the WAL record, requiring API changes to pass down the heap Relation. XLOG_PAGE_MAGIC updated.
1 parent 59292f2 commit a760893

File tree

6 files changed

+254
-71
lines changed

6 files changed

+254
-71
lines changed

src/backend/access/nbtree/nbtinsert.c

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.177 2010/02/26 02:00:34 momjian Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.178 2010/03/28 09:27:01 sriggs Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -57,7 +57,8 @@ static void _bt_findinsertloc(Relation rel,
5757
OffsetNumber *offsetptr,
5858
int keysz,
5959
ScanKey scankey,
60-
IndexTuple newtup);
60+
IndexTuple newtup,
61+
Relation heapRel);
6162
static void _bt_insertonpg(Relation rel, Buffer buf,
6263
BTStack stack,
6364
IndexTuple itup,
@@ -78,7 +79,7 @@ static void _bt_pgaddtup(Relation rel, Page page,
7879
OffsetNumber itup_off, const char *where);
7980
static bool _bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum,
8081
int keysz, ScanKey scankey);
81-
static void _bt_vacuum_one_page(Relation rel, Buffer buffer);
82+
static void _bt_vacuum_one_page(Relation rel, Buffer buffer, Relation heapRel);
8283

8384

8485
/*
@@ -175,7 +176,7 @@ _bt_doinsert(Relation rel, IndexTuple itup,
175176
if (checkUnique != UNIQUE_CHECK_EXISTING)
176177
{
177178
/* do the insertion */
178-
_bt_findinsertloc(rel, &buf, &offset, natts, itup_scankey, itup);
179+
_bt_findinsertloc(rel, &buf, &offset, natts, itup_scankey, itup, heapRel);
179180
_bt_insertonpg(rel, buf, stack, itup, offset, false);
180181
}
181182
else
@@ -491,7 +492,8 @@ _bt_findinsertloc(Relation rel,
491492
OffsetNumber *offsetptr,
492493
int keysz,
493494
ScanKey scankey,
494-
IndexTuple newtup)
495+
IndexTuple newtup,
496+
Relation heapRel)
495497
{
496498
Buffer buf = *bufptr;
497499
Page page = BufferGetPage(buf);
@@ -556,7 +558,7 @@ _bt_findinsertloc(Relation rel,
556558
*/
557559
if (P_ISLEAF(lpageop) && P_HAS_GARBAGE(lpageop))
558560
{
559-
_bt_vacuum_one_page(rel, buf);
561+
_bt_vacuum_one_page(rel, buf, heapRel);
560562

561563
/*
562564
* remember that we vacuumed this page, because that makes the
@@ -1998,7 +2000,7 @@ _bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum,
19982000
* super-exclusive "cleanup" lock (see nbtree/README).
19992001
*/
20002002
static void
2001-
_bt_vacuum_one_page(Relation rel, Buffer buffer)
2003+
_bt_vacuum_one_page(Relation rel, Buffer buffer, Relation heapRel)
20022004
{
20032005
OffsetNumber deletable[MaxOffsetNumber];
20042006
int ndeletable = 0;
@@ -2025,7 +2027,7 @@ _bt_vacuum_one_page(Relation rel, Buffer buffer)
20252027
}
20262028

20272029
if (ndeletable > 0)
2028-
_bt_delitems(rel, buffer, deletable, ndeletable, false, 0);
2030+
_bt_delitems_delete(rel, buffer, deletable, ndeletable, heapRel);
20292031

20302032
/*
20312033
* Note: if we didn't find any LP_DEAD items, then the page's

src/backend/access/nbtree/nbtpage.c

Lines changed: 85 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
*
1010
*
1111
* IDENTIFICATION
12-
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.121 2010/03/19 10:41:21 sriggs Exp $
12+
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.122 2010/03/28 09:27:01 sriggs Exp $
1313
*
1414
* NOTES
1515
* Postgres btree pages look like ordinary relation pages. The opaque
@@ -719,15 +719,12 @@ _bt_page_recyclable(Page page)
719719
* ensure correct locking.
720720
*/
721721
void
722-
_bt_delitems(Relation rel, Buffer buf,
723-
OffsetNumber *itemnos, int nitems, bool isVacuum,
724-
BlockNumber lastBlockVacuumed)
722+
_bt_delitems_vacuum(Relation rel, Buffer buf,
723+
OffsetNumber *itemnos, int nitems, BlockNumber lastBlockVacuumed)
725724
{
726725
Page page = BufferGetPage(buf);
727726
BTPageOpaque opaque;
728727

729-
Assert(isVacuum || lastBlockVacuumed == 0);
730-
731728
/* No ereport(ERROR) until changes are logged */
732729
START_CRIT_SECTION();
733730

@@ -759,35 +756,14 @@ _bt_delitems(Relation rel, Buffer buf,
759756
XLogRecPtr recptr;
760757
XLogRecData rdata[2];
761758

762-
if (isVacuum)
763-
{
764-
xl_btree_vacuum xlrec_vacuum;
765-
766-
xlrec_vacuum.node = rel->rd_node;
767-
xlrec_vacuum.block = BufferGetBlockNumber(buf);
768-
769-
xlrec_vacuum.lastBlockVacuumed = lastBlockVacuumed;
770-
rdata[0].data = (char *) &xlrec_vacuum;
771-
rdata[0].len = SizeOfBtreeVacuum;
772-
}
773-
else
774-
{
775-
xl_btree_delete xlrec_delete;
776-
777-
xlrec_delete.node = rel->rd_node;
778-
xlrec_delete.block = BufferGetBlockNumber(buf);
759+
xl_btree_vacuum xlrec_vacuum;
779760

780-
/*
781-
* XXX: We would like to set an accurate latestRemovedXid, but
782-
* there is no easy way of obtaining a useful value. So we punt
783-
* and store InvalidTransactionId, which forces the standby to
784-
* wait for/cancel all currently running transactions.
785-
*/
786-
xlrec_delete.latestRemovedXid = InvalidTransactionId;
787-
rdata[0].data = (char *) &xlrec_delete;
788-
rdata[0].len = SizeOfBtreeDelete;
789-
}
761+
xlrec_vacuum.node = rel->rd_node;
762+
xlrec_vacuum.block = BufferGetBlockNumber(buf);
790763

764+
xlrec_vacuum.lastBlockVacuumed = lastBlockVacuumed;
765+
rdata[0].data = (char *) &xlrec_vacuum;
766+
rdata[0].len = SizeOfBtreeVacuum;
791767
rdata[0].buffer = InvalidBuffer;
792768
rdata[0].next = &(rdata[1]);
793769

@@ -810,10 +786,82 @@ _bt_delitems(Relation rel, Buffer buf,
810786
rdata[1].buffer_std = true;
811787
rdata[1].next = NULL;
812788

813-
if (isVacuum)
814-
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM, rdata);
815-
else
816-
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE, rdata);
789+
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM, rdata);
790+
791+
PageSetLSN(page, recptr);
792+
PageSetTLI(page, ThisTimeLineID);
793+
}
794+
795+
END_CRIT_SECTION();
796+
}
797+
798+
void
799+
_bt_delitems_delete(Relation rel, Buffer buf,
800+
OffsetNumber *itemnos, int nitems, Relation heapRel)
801+
{
802+
Page page = BufferGetPage(buf);
803+
BTPageOpaque opaque;
804+
805+
Assert(nitems > 0);
806+
807+
/* No ereport(ERROR) until changes are logged */
808+
START_CRIT_SECTION();
809+
810+
/* Fix the page */
811+
PageIndexMultiDelete(page, itemnos, nitems);
812+
813+
/*
814+
* We can clear the vacuum cycle ID since this page has certainly been
815+
* processed by the current vacuum scan.
816+
*/
817+
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
818+
opaque->btpo_cycleid = 0;
819+
820+
/*
821+
* Mark the page as not containing any LP_DEAD items. This is not
822+
* certainly true (there might be some that have recently been marked, but
823+
* weren't included in our target-item list), but it will almost always be
824+
* true and it doesn't seem worth an additional page scan to check it.
825+
* Remember that BTP_HAS_GARBAGE is only a hint anyway.
826+
*/
827+
opaque->btpo_flags &= ~BTP_HAS_GARBAGE;
828+
829+
MarkBufferDirty(buf);
830+
831+
/* XLOG stuff */
832+
if (!rel->rd_istemp)
833+
{
834+
XLogRecPtr recptr;
835+
XLogRecData rdata[3];
836+
837+
xl_btree_delete xlrec_delete;
838+
839+
xlrec_delete.node = rel->rd_node;
840+
xlrec_delete.hnode = heapRel->rd_node;
841+
xlrec_delete.block = BufferGetBlockNumber(buf);
842+
xlrec_delete.nitems = nitems;
843+
844+
rdata[0].data = (char *) &xlrec_delete;
845+
rdata[0].len = SizeOfBtreeDelete;
846+
rdata[0].buffer = InvalidBuffer;
847+
rdata[0].next = &(rdata[1]);
848+
849+
/*
850+
* We need the target-offsets array whether or not we store the
851+
* to allow us to find the latestRemovedXid on a standby server.
852+
*/
853+
rdata[1].data = (char *) itemnos;
854+
rdata[1].len = nitems * sizeof(OffsetNumber);
855+
rdata[1].buffer = InvalidBuffer;
856+
rdata[1].next = &(rdata[2]);
857+
858+
rdata[2].data = NULL;
859+
rdata[2].len = 0;
860+
rdata[2].buffer = buf;
861+
rdata[2].buffer_std = true;
862+
rdata[2].next = NULL;
863+
864+
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE, rdata);
817865

818866
PageSetLSN(page, recptr);
819867
PageSetTLI(page, ThisTimeLineID);

src/backend/access/nbtree/nbtree.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Portions Copyright (c) 1994, Regents of the University of California
1313
*
1414
* IDENTIFICATION
15-
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.176 2010/02/26 02:00:34 momjian Exp $
15+
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.177 2010/03/28 09:27:01 sriggs Exp $
1616
*
1717
*-------------------------------------------------------------------------
1818
*/
@@ -708,7 +708,7 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
708708
buf = ReadBufferExtended(rel, MAIN_FORKNUM, num_pages - 1, RBM_NORMAL,
709709
info->strategy);
710710
LockBufferForCleanup(buf);
711-
_bt_delitems(rel, buf, NULL, 0, true, vstate.lastBlockVacuumed);
711+
_bt_delitems_vacuum(rel, buf, NULL, 0, vstate.lastBlockVacuumed);
712712
_bt_relbuf(rel, buf);
713713
}
714714

@@ -889,7 +889,7 @@ btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
889889
{
890890
BlockNumber lastBlockVacuumed = BufferGetBlockNumber(buf);
891891

892-
_bt_delitems(rel, buf, deletable, ndeletable, true, vstate->lastBlockVacuumed);
892+
_bt_delitems_vacuum(rel, buf, deletable, ndeletable, vstate->lastBlockVacuumed);
893893

894894
/*
895895
* Keep track of the block number of the lastBlockVacuumed, so we

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy