Skip to content

Commit bbc1376

Browse files
committed
Teach verify_heapam() to validate update chains within a page.
Prior to this commit, we only consider each tuple or line pointer on the page in isolation, but now we can do some validation of a line pointer against its successor. For example, a redirect line pointer shouldn't point to another redirect line pointer, and if a tuple is HOT-updated, the result should be a heap-only tuple. Himanshu Upadhyaya and Robert Haas, reviewed by Aleksander Alekseev, Andres Freund, and Peter Geoghegan.
1 parent d69c404 commit bbc1376

File tree

2 files changed

+524
-17
lines changed

2 files changed

+524
-17
lines changed

contrib/amcheck/verify_heapam.c

Lines changed: 285 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,9 @@ typedef struct HeapCheckContext
150150
} HeapCheckContext;
151151

152152
/* Internal implementation */
153-
static void check_tuple(HeapCheckContext *ctx);
153+
static void check_tuple(HeapCheckContext *ctx,
154+
bool *xmin_commit_status_ok,
155+
XidCommitStatus *xmin_commit_status);
154156
static void check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx,
155157
ToastedAttribute *ta, int32 *expected_chunk_seq,
156158
uint32 extsize);
@@ -160,7 +162,9 @@ static void check_toasted_attribute(HeapCheckContext *ctx,
160162
ToastedAttribute *ta);
161163

162164
static bool check_tuple_header(HeapCheckContext *ctx);
163-
static bool check_tuple_visibility(HeapCheckContext *ctx);
165+
static bool check_tuple_visibility(HeapCheckContext *ctx,
166+
bool *xmin_commit_status_ok,
167+
XidCommitStatus *xmin_commit_status);
164168

165169
static void report_corruption(HeapCheckContext *ctx, char *msg);
166170
static void report_toast_corruption(HeapCheckContext *ctx,
@@ -399,9 +403,16 @@ verify_heapam(PG_FUNCTION_ARGS)
399403
for (ctx.blkno = first_block; ctx.blkno <= last_block; ctx.blkno++)
400404
{
401405
OffsetNumber maxoff;
406+
OffsetNumber predecessor[MaxOffsetNumber];
407+
OffsetNumber successor[MaxOffsetNumber];
408+
bool lp_valid[MaxOffsetNumber];
409+
bool xmin_commit_status_ok[MaxOffsetNumber];
410+
XidCommitStatus xmin_commit_status[MaxOffsetNumber];
402411

403412
CHECK_FOR_INTERRUPTS();
404413

414+
memset(predecessor, 0, sizeof(OffsetNumber) * MaxOffsetNumber);
415+
405416
/* Optionally skip over all-frozen or all-visible blocks */
406417
if (skip_option != SKIP_PAGES_NONE)
407418
{
@@ -433,6 +444,12 @@ verify_heapam(PG_FUNCTION_ARGS)
433444
for (ctx.offnum = FirstOffsetNumber; ctx.offnum <= maxoff;
434445
ctx.offnum = OffsetNumberNext(ctx.offnum))
435446
{
447+
BlockNumber nextblkno;
448+
OffsetNumber nextoffnum;
449+
450+
successor[ctx.offnum] = InvalidOffsetNumber;
451+
lp_valid[ctx.offnum] = false;
452+
xmin_commit_status_ok[ctx.offnum] = false;
436453
ctx.itemid = PageGetItemId(ctx.page, ctx.offnum);
437454

438455
/* Skip over unused/dead line pointers */
@@ -469,6 +486,14 @@ verify_heapam(PG_FUNCTION_ARGS)
469486
report_corruption(&ctx,
470487
psprintf("line pointer redirection to unused item at offset %u",
471488
(unsigned) rdoffnum));
489+
490+
/*
491+
* Record the fact that this line pointer has passed basic
492+
* sanity checking, and also the offset number to which it
493+
* points.
494+
*/
495+
lp_valid[ctx.offnum] = true;
496+
successor[ctx.offnum] = rdoffnum;
472497
continue;
473498
}
474499

@@ -502,11 +527,237 @@ verify_heapam(PG_FUNCTION_ARGS)
502527
}
503528

504529
/* It should be safe to examine the tuple's header, at least */
530+
lp_valid[ctx.offnum] = true;
505531
ctx.tuphdr = (HeapTupleHeader) PageGetItem(ctx.page, ctx.itemid);
506532
ctx.natts = HeapTupleHeaderGetNatts(ctx.tuphdr);
507533

508534
/* Ok, ready to check this next tuple */
509-
check_tuple(&ctx);
535+
check_tuple(&ctx,
536+
&xmin_commit_status_ok[ctx.offnum],
537+
&xmin_commit_status[ctx.offnum]);
538+
539+
/*
540+
* If the CTID field of this tuple seems to point to another tuple
541+
* on the same page, record that tuple as the successor of this
542+
* one.
543+
*/
544+
nextblkno = ItemPointerGetBlockNumber(&(ctx.tuphdr)->t_ctid);
545+
nextoffnum = ItemPointerGetOffsetNumber(&(ctx.tuphdr)->t_ctid);
546+
if (nextblkno == ctx.blkno && nextoffnum != ctx.offnum)
547+
successor[ctx.offnum] = nextoffnum;
548+
}
549+
550+
/*
551+
* Update chain validation. Check each line pointer that's got a valid
552+
* successor against that successor.
553+
*/
554+
ctx.attnum = -1;
555+
for (ctx.offnum = FirstOffsetNumber; ctx.offnum <= maxoff;
556+
ctx.offnum = OffsetNumberNext(ctx.offnum))
557+
{
558+
ItemId curr_lp;
559+
ItemId next_lp;
560+
HeapTupleHeader curr_htup;
561+
HeapTupleHeader next_htup;
562+
TransactionId curr_xmin;
563+
TransactionId curr_xmax;
564+
TransactionId next_xmin;
565+
OffsetNumber nextoffnum = successor[ctx.offnum];
566+
567+
/*
568+
* The current line pointer may not have a successor, either
569+
* because it's not valid or because it didn't point to anything.
570+
* In either case, we have to give up.
571+
*
572+
* If the current line pointer does point to something, it's
573+
* possible that the target line pointer isn't valid. We have to
574+
* give up in that case, too.
575+
*/
576+
if (nextoffnum == InvalidOffsetNumber || !lp_valid[nextoffnum])
577+
continue;
578+
579+
/* We have two valid line pointers that we can examine. */
580+
curr_lp = PageGetItemId(ctx.page, ctx.offnum);
581+
next_lp = PageGetItemId(ctx.page, nextoffnum);
582+
583+
/* Handle the cases where the current line pointer is a redirect. */
584+
if (ItemIdIsRedirected(curr_lp))
585+
{
586+
/* Can't redirect to another redirect. */
587+
if (ItemIdIsRedirected(next_lp))
588+
{
589+
report_corruption(&ctx,
590+
psprintf("redirected line pointer points to another redirected line pointer at offset %u",
591+
(unsigned) nextoffnum));
592+
continue;
593+
}
594+
595+
/* Can only redirect to a HOT tuple. */
596+
next_htup = (HeapTupleHeader) PageGetItem(ctx.page, next_lp);
597+
if (!HeapTupleHeaderIsHeapOnly(next_htup))
598+
{
599+
report_corruption(&ctx,
600+
psprintf("redirected line pointer points to a non-heap-only tuple at offset %u",
601+
(unsigned) nextoffnum));
602+
}
603+
604+
/*
605+
* Redirects are created by updates, so successor should be
606+
* the result of an update.
607+
*/
608+
if ((next_htup->t_infomask & HEAP_UPDATED) == 0)
609+
{
610+
report_corruption(&ctx,
611+
psprintf("redirected line pointer points to a non-heap-updated tuple at offset %u",
612+
(unsigned) nextoffnum));
613+
}
614+
615+
/* HOT chains should not intersect. */
616+
if (predecessor[nextoffnum] != InvalidOffsetNumber)
617+
{
618+
report_corruption(&ctx,
619+
psprintf("redirect line pointer points to offset %u, but offset %u also points there",
620+
(unsigned) nextoffnum, (unsigned) predecessor[nextoffnum]));
621+
continue;
622+
}
623+
624+
/*
625+
* This redirect and the tuple to which it points seem to be
626+
* part of an update chain.
627+
*/
628+
predecessor[nextoffnum] = ctx.offnum;
629+
continue;
630+
}
631+
632+
/*
633+
* If the next line pointer is a redirect, or if it's a tuple
634+
* but the XMAX of this tuple doesn't match the XMIN of the next
635+
* tuple, then the two aren't part of the same update chain and
636+
* there is nothing more to do.
637+
*/
638+
if (ItemIdIsRedirected(next_lp))
639+
continue;
640+
curr_htup = (HeapTupleHeader) PageGetItem(ctx.page, curr_lp);
641+
curr_xmax = HeapTupleHeaderGetUpdateXid(curr_htup);
642+
next_htup = (HeapTupleHeader) PageGetItem(ctx.page, next_lp);
643+
next_xmin = HeapTupleHeaderGetXmin(next_htup);
644+
if (!TransactionIdIsValid(curr_xmax) ||
645+
!TransactionIdEquals(curr_xmax, next_xmin))
646+
continue;
647+
648+
/* HOT chains should not intersect. */
649+
if (predecessor[nextoffnum] != InvalidOffsetNumber)
650+
{
651+
report_corruption(&ctx,
652+
psprintf("tuple points to new version at offset %u, but offset %u also points there",
653+
(unsigned) nextoffnum, (unsigned) predecessor[nextoffnum]));
654+
continue;
655+
}
656+
657+
/*
658+
* This tuple and the tuple to which it points seem to be part
659+
* of an update chain.
660+
*/
661+
predecessor[nextoffnum] = ctx.offnum;
662+
663+
/*
664+
* If the current tuple is marked as HOT-updated, then the next
665+
* tuple should be marked as a heap-only tuple. Conversely, if the
666+
* current tuple isn't marked as HOT-updated, then the next tuple
667+
* shouldn't be marked as a heap-only tuple.
668+
*/
669+
if (!HeapTupleHeaderIsHotUpdated(curr_htup) &&
670+
HeapTupleHeaderIsHeapOnly(next_htup))
671+
{
672+
report_corruption(&ctx,
673+
psprintf("non-heap-only update produced a heap-only tuple at offset %u",
674+
(unsigned) nextoffnum));
675+
}
676+
if (HeapTupleHeaderIsHotUpdated(curr_htup) &&
677+
!HeapTupleHeaderIsHeapOnly(next_htup))
678+
{
679+
report_corruption(&ctx,
680+
psprintf("heap-only update produced a non-heap only tuple at offset %u",
681+
(unsigned) nextoffnum));
682+
}
683+
684+
/*
685+
* If the current tuple's xmin is still in progress but the
686+
* successor tuple's xmin is committed, that's corruption.
687+
*
688+
* NB: We recheck the commit status of the current tuple's xmin
689+
* here, because it might have committed after we checked it and
690+
* before we checked the commit status of the successor tuple's
691+
* xmin. This should be safe because the xmin itself can't have
692+
* changed, only its commit status.
693+
*/
694+
curr_xmin = HeapTupleHeaderGetXmin(curr_htup);
695+
if (xmin_commit_status_ok[ctx.offnum] &&
696+
xmin_commit_status[ctx.offnum] == XID_IN_PROGRESS &&
697+
xmin_commit_status_ok[nextoffnum] &&
698+
xmin_commit_status[nextoffnum] == XID_COMMITTED &&
699+
TransactionIdIsInProgress(curr_xmin))
700+
{
701+
report_corruption(&ctx,
702+
psprintf("tuple with in-progress xmin %u was updated to produce a tuple at offset %u with committed xmin %u",
703+
(unsigned) curr_xmin,
704+
(unsigned) ctx.offnum,
705+
(unsigned) next_xmin));
706+
}
707+
708+
/*
709+
* If the current tuple's xmin is aborted but the successor tuple's
710+
* xmin is in-progress or committed, that's corruption.
711+
*/
712+
if (xmin_commit_status_ok[ctx.offnum] &&
713+
xmin_commit_status[ctx.offnum] == XID_ABORTED &&
714+
xmin_commit_status_ok[nextoffnum])
715+
{
716+
if (xmin_commit_status[nextoffnum] == XID_IN_PROGRESS)
717+
report_corruption(&ctx,
718+
psprintf("tuple with aborted xmin %u was updated to produce a tuple at offset %u with in-progress xmin %u",
719+
(unsigned) curr_xmin,
720+
(unsigned) ctx.offnum,
721+
(unsigned) next_xmin));
722+
else if (xmin_commit_status[nextoffnum] == XID_COMMITTED)
723+
report_corruption(&ctx,
724+
psprintf("tuple with aborted xmin %u was updated to produce a tuple at offset %u with committed xmin %u",
725+
(unsigned) curr_xmin,
726+
(unsigned) ctx.offnum,
727+
(unsigned) next_xmin));
728+
}
729+
}
730+
731+
/*
732+
* An update chain can start either with a non-heap-only tuple or with
733+
* a redirect line pointer, but not with a heap-only tuple.
734+
*
735+
* (This check is in a separate loop because we need the predecessor
736+
* array to be fully populated before we can perform it.)
737+
*/
738+
for (ctx.offnum = FirstOffsetNumber;
739+
ctx.offnum <= maxoff;
740+
ctx.offnum = OffsetNumberNext(ctx.offnum))
741+
{
742+
if (xmin_commit_status_ok[ctx.offnum] &&
743+
(xmin_commit_status[ctx.offnum] == XID_COMMITTED ||
744+
xmin_commit_status[ctx.offnum] == XID_IN_PROGRESS) &&
745+
predecessor[ctx.offnum] == InvalidOffsetNumber)
746+
{
747+
ItemId curr_lp;
748+
749+
curr_lp = PageGetItemId(ctx.page, ctx.offnum);
750+
if (!ItemIdIsRedirected(curr_lp))
751+
{
752+
HeapTupleHeader curr_htup;
753+
754+
curr_htup = (HeapTupleHeader)
755+
PageGetItem(ctx.page, curr_lp);
756+
if (HeapTupleHeaderIsHeapOnly(curr_htup))
757+
report_corruption(&ctx,
758+
psprintf("tuple is root of chain but is marked as heap-only tuple"));
759+
}
760+
}
510761
}
511762

512763
/* clean up */
@@ -638,6 +889,7 @@ check_tuple_header(HeapCheckContext *ctx)
638889
{
639890
HeapTupleHeader tuphdr = ctx->tuphdr;
640891
uint16 infomask = tuphdr->t_infomask;
892+
TransactionId curr_xmax = HeapTupleHeaderGetUpdateXid(tuphdr);
641893
bool result = true;
642894
unsigned expected_hoff;
643895

@@ -663,6 +915,19 @@ check_tuple_header(HeapCheckContext *ctx)
663915
*/
664916
}
665917

918+
if (!TransactionIdIsValid(curr_xmax) &&
919+
HeapTupleHeaderIsHotUpdated(tuphdr))
920+
{
921+
report_corruption(ctx,
922+
psprintf("tuple has been HOT updated, but xmax is 0"));
923+
924+
/*
925+
* As above, even though this shouldn't happen, it's not sufficient
926+
* justification for skipping further checks, we should still be able
927+
* to perform sensibly.
928+
*/
929+
}
930+
666931
if (infomask & HEAP_HASNULL)
667932
expected_hoff = MAXALIGN(SizeofHeapTupleHeader + BITMAPLEN(ctx->natts));
668933
else
@@ -718,9 +983,14 @@ check_tuple_header(HeapCheckContext *ctx)
718983
* Returns true if the tuple itself should be checked, false otherwise. Sets
719984
* ctx->tuple_could_be_pruned if the tuple -- and thus also any associated
720985
* TOAST tuples -- are eligible for pruning.
986+
*
987+
* Sets *xmin_commit_status_ok to true if the commit status of xmin is known
988+
* and false otherwise. If it's set to true, then also set *xid_commit_status
989+
* to the actual commit status.
721990
*/
722991
static bool
723-
check_tuple_visibility(HeapCheckContext *ctx)
992+
check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok,
993+
XidCommitStatus *xmin_commit_status)
724994
{
725995
TransactionId xmin;
726996
TransactionId xvac;
@@ -731,13 +1001,17 @@ check_tuple_visibility(HeapCheckContext *ctx)
7311001
HeapTupleHeader tuphdr = ctx->tuphdr;
7321002

7331003
ctx->tuple_could_be_pruned = true; /* have not yet proven otherwise */
1004+
*xmin_commit_status_ok = false; /* have not yet proven otherwise */
7341005

7351006
/* If xmin is normal, it should be within valid range */
7361007
xmin = HeapTupleHeaderGetXmin(tuphdr);
7371008
switch (get_xid_status(xmin, ctx, &xmin_status))
7381009
{
7391010
case XID_INVALID:
1011+
break;
7401012
case XID_BOUNDS_OK:
1013+
*xmin_commit_status_ok = true;
1014+
*xmin_commit_status = xmin_status;
7411015
break;
7421016
case XID_IN_FUTURE:
7431017
report_corruption(ctx,
@@ -1515,9 +1789,13 @@ check_toasted_attribute(HeapCheckContext *ctx, ToastedAttribute *ta)
15151789
/*
15161790
* Check the current tuple as tracked in ctx, recording any corruption found in
15171791
* ctx->tupstore.
1792+
*
1793+
* We return some information about the status of xmin to aid in validating
1794+
* update chains.
15181795
*/
15191796
static void
1520-
check_tuple(HeapCheckContext *ctx)
1797+
check_tuple(HeapCheckContext *ctx, bool *xmin_commit_status_ok,
1798+
XidCommitStatus *xmin_commit_status)
15211799
{
15221800
/*
15231801
* Check various forms of tuple header corruption, and if the header is
@@ -1531,7 +1809,8 @@ check_tuple(HeapCheckContext *ctx)
15311809
* cannot assume our relation description matches the tuple structure, and
15321810
* therefore cannot check it.
15331811
*/
1534-
if (!check_tuple_visibility(ctx))
1812+
if (!check_tuple_visibility(ctx, xmin_commit_status_ok,
1813+
xmin_commit_status))
15351814
return;
15361815

15371816
/*

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy