Skip to content

Commit e04509f

Browse files
committed
amcheck: Distinguish interrupted page deletion from corruption.
This prevents false-positive reports about "the first child of leftmost target page is not leftmost of its level", "block %u is not leftmost" and "left link/right link pair". They appeared if amcheck ran before VACUUM cleaned things, after a cluster exited recovery between the first-stage and second-stage WAL records of a deletion. Back-patch to v11 (all supported versions). Reviewed by Peter Geoghegan. Discussion: https://postgr.es/m/20231005025232.c7.nmisch@google.com
1 parent c804ffb commit e04509f

File tree

1 file changed

+72
-4
lines changed

1 file changed

+72
-4
lines changed

contrib/amcheck/verify_nbtree.c

Lines changed: 72 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,9 @@ static void bt_check_every_level(Relation rel, Relation heaprel,
127127
bool readonly, bool heapallindexed);
128128
static BtreeLevel bt_check_level_from_leftmost(BtreeCheckState *state,
129129
BtreeLevel level);
130+
static bool bt_leftmost_ignoring_half_dead(BtreeCheckState *state,
131+
BlockNumber start,
132+
BTPageOpaque start_opaque);
130133
static void bt_target_page_check(BtreeCheckState *state);
131134
static ScanKey bt_right_page_check_scankey(BtreeCheckState *state);
132135
static void bt_downlink_check(BtreeCheckState *state, BlockNumber childblock,
@@ -716,7 +719,7 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
716719
*/
717720
if (state->readonly)
718721
{
719-
if (!P_LEFTMOST(opaque))
722+
if (!bt_leftmost_ignoring_half_dead(state, current, opaque))
720723
ereport(ERROR,
721724
(errcode(ERRCODE_INDEX_CORRUPTED),
722725
errmsg("block %u is not leftmost in index \"%s\"",
@@ -769,10 +772,14 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
769772
}
770773

771774
/*
772-
* readonly mode can only ever land on live pages and half-dead pages,
773-
* so sibling pointers should always be in mutual agreement
775+
* Sibling links should be in mutual agreement. There arises
776+
* leftcurrent == P_NONE && btpo_prev != P_NONE when the left sibling
777+
* of the parent's low-key downlink is half-dead. (A half-dead page
778+
* has no downlink from its parent.) Under heavyweight locking, the
779+
* last bt_leftmost_ignoring_half_dead() validated this btpo_prev.
774780
*/
775-
if (state->readonly && opaque->btpo_prev != leftcurrent)
781+
if (state->readonly &&
782+
opaque->btpo_prev != leftcurrent && leftcurrent != P_NONE)
776783
ereport(ERROR,
777784
(errcode(ERRCODE_INDEX_CORRUPTED),
778785
errmsg("left link/right link pair in index \"%s\" not in agreement",
@@ -822,6 +829,67 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
822829
return nextleveldown;
823830
}
824831

832+
/*
833+
* Like P_LEFTMOST(start_opaque), but accept an arbitrarily-long chain of
834+
* half-dead, sibling-linked pages to the left. If a half-dead page appears
835+
* under state->readonly, the database exited recovery between the first-stage
836+
* and second-stage WAL records of a deletion.
837+
*/
838+
static bool
839+
bt_leftmost_ignoring_half_dead(BtreeCheckState *state,
840+
BlockNumber start,
841+
BTPageOpaque start_opaque)
842+
{
843+
BlockNumber reached = start_opaque->btpo_prev,
844+
reached_from = start;
845+
bool all_half_dead = true;
846+
847+
/*
848+
* To handle the !readonly case, we'd need to accept BTP_DELETED pages and
849+
* potentially observe nbtree/README "Page deletion and backwards scans".
850+
*/
851+
Assert(state->readonly);
852+
853+
while (reached != P_NONE && all_half_dead)
854+
{
855+
Page page = palloc_btree_page(state, reached);
856+
BTPageOpaque reached_opaque = (BTPageOpaque) PageGetSpecialPointer(page);
857+
858+
CHECK_FOR_INTERRUPTS();
859+
860+
/*
861+
* Try to detect btpo_prev circular links. _bt_unlink_halfdead_page()
862+
* writes that side-links will continue to point to the siblings.
863+
* Check btpo_next for that property.
864+
*/
865+
all_half_dead = P_ISHALFDEAD(reached_opaque) &&
866+
reached != start &&
867+
reached != reached_from &&
868+
reached_opaque->btpo_next == reached_from;
869+
if (all_half_dead)
870+
{
871+
XLogRecPtr pagelsn = PageGetLSN(page);
872+
873+
/* pagelsn should point to an XLOG_BTREE_MARK_PAGE_HALFDEAD */
874+
ereport(DEBUG1,
875+
(errcode(ERRCODE_NO_DATA),
876+
errmsg_internal("harmless interrupted page deletion detected in index \"%s\"",
877+
RelationGetRelationName(state->rel)),
878+
errdetail_internal("Block=%u right block=%u page lsn=%X/%X.",
879+
reached, reached_from,
880+
(uint32) (pagelsn >> 32),
881+
(uint32) pagelsn)));
882+
883+
reached_from = reached;
884+
reached = reached_opaque->btpo_prev;
885+
}
886+
887+
pfree(page);
888+
}
889+
890+
return all_half_dead;
891+
}
892+
825893
/*
826894
* Function performs the following checks on target page, or pages ancillary to
827895
* target page:

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy