Skip to content

Commit 1e616f6

Browse files
committed
During recovery, if we reach consistent state and still have entries in the
invalid-page hash table, PANIC immediately. Immediate PANIC is much better than waiting for end-of-recovery, which is what we did before, because the end-of-recovery might not come until months later if this is a standby server. Also refrain from creating a restartpoint if there are invalid-page entries in the hash table. Restarting recovery from such a restartpoint would not see the invalid references, and wouldn't be able to cross-check them when consistency is reached. That wouldn't matter when things are going smoothly, but the more sanity checks you have the better. Fujii Masao
1 parent 15a5006 commit 1e616f6

File tree

4 files changed

+70
-28
lines changed

4 files changed

+70
-28
lines changed

src/backend/access/transam/xlog.c

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -562,7 +562,7 @@ static TimeLineID lastPageTLI = 0;
562562
static XLogRecPtr minRecoveryPoint; /* local copy of
563563
* ControlFile->minRecoveryPoint */
564564
static bool updateMinRecoveryPoint = true;
565-
static bool reachedMinRecoveryPoint = false;
565+
bool reachedMinRecoveryPoint = false;
566566

567567
static bool InRedo = false;
568568

@@ -6758,12 +6758,6 @@ StartupXLOG(void)
67586758
/* Disallow XLogInsert again */
67596759
LocalXLogInsertAllowed = -1;
67606760

6761-
/*
6762-
* Check to see if the XLOG sequence contained any unresolved
6763-
* references to uninitialized pages.
6764-
*/
6765-
XLogCheckInvalidPages();
6766-
67676761
/*
67686762
* Perform a checkpoint to update all our recovery activity to disk.
67696763
*
@@ -6906,6 +6900,12 @@ CheckRecoveryConsistency(void)
69066900
XLByteLE(minRecoveryPoint, EndRecPtr) &&
69076901
XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
69086902
{
6903+
/*
6904+
* Check to see if the XLOG sequence contained any unresolved
6905+
* references to uninitialized pages.
6906+
*/
6907+
XLogCheckInvalidPages();
6908+
69096909
reachedMinRecoveryPoint = true;
69106910
ereport(LOG,
69116911
(errmsg("consistent recovery state reached at %X/%X",
@@ -7907,7 +7907,7 @@ RecoveryRestartPoint(const CheckPoint *checkPoint)
79077907
volatile XLogCtlData *xlogctl = XLogCtl;
79087908

79097909
/*
7910-
* Is it safe to checkpoint? We must ask each of the resource managers
7910+
* Is it safe to restartpoint? We must ask each of the resource managers
79117911
* whether they have any partial state information that might prevent a
79127912
* correct restart from this point. If so, we skip this opportunity, but
79137913
* return at the next checkpoint record for another try.
@@ -7926,6 +7926,22 @@ RecoveryRestartPoint(const CheckPoint *checkPoint)
79267926
}
79277927
}
79287928

7929+
/*
7930+
* Also refrain from creating a restartpoint if we have seen any references
7931+
* to non-existent pages. Restarting recovery from the restartpoint would
7932+
* not see the references, so we would lose the cross-check that the pages
7933+
* belonged to a relation that was dropped later.
7934+
*/
7935+
if (XLogHaveInvalidPages())
7936+
{
7937+
elog(trace_recovery(DEBUG2),
7938+
"could not record restart point at %X/%X because there "
7939+
"are unresolved references to invalid pages",
7940+
checkPoint->redo.xlogid,
7941+
checkPoint->redo.xrecoff);
7942+
return;
7943+
}
7944+
79297945
/*
79307946
* Copy the checkpoint record to shared memory, so that checkpointer
79317947
* can work out the next time it wants to perform a restartpoint.

src/backend/access/transam/xlogutils.c

Lines changed: 43 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,22 @@ typedef struct xl_invalid_page
5252
static HTAB *invalid_page_tab = NULL;
5353

5454

55+
/* Report a reference to an invalid page */
56+
static void
57+
report_invalid_page(int elevel, RelFileNode node, ForkNumber forkno,
58+
BlockNumber blkno, bool present)
59+
{
60+
char *path = relpathperm(node, forkno);
61+
62+
if (present)
63+
elog(elevel, "page %u of relation %s is uninitialized",
64+
blkno, path);
65+
else
66+
elog(elevel, "page %u of relation %s does not exist",
67+
blkno, path);
68+
pfree(path);
69+
}
70+
5571
/* Log a reference to an invalid page */
5672
static void
5773
log_invalid_page(RelFileNode node, ForkNumber forkno, BlockNumber blkno,
@@ -61,23 +77,27 @@ log_invalid_page(RelFileNode node, ForkNumber forkno, BlockNumber blkno,
6177
xl_invalid_page *hentry;
6278
bool found;
6379

80+
/*
81+
* Once recovery has reached a consistent state, the invalid-page table
82+
* should be empty and remain so. If a reference to an invalid page is
83+
* found after consistency is reached, PANIC immediately. This might
84+
* seem aggressive, but it's better than letting the invalid reference
85+
* linger in the hash table until the end of recovery and PANIC there,
86+
* which might come only much later if this is a standby server.
87+
*/
88+
if (reachedMinRecoveryPoint)
89+
{
90+
report_invalid_page(WARNING, node, forkno, blkno, present);
91+
elog(PANIC, "WAL contains references to invalid pages");
92+
}
93+
6494
/*
6595
* Log references to invalid pages at DEBUG1 level. This allows some
6696
* tracing of the cause (note the elog context mechanism will tell us
6797
* something about the XLOG record that generated the reference).
6898
*/
6999
if (log_min_messages <= DEBUG1 || client_min_messages <= DEBUG1)
70-
{
71-
char *path = relpathperm(node, forkno);
72-
73-
if (present)
74-
elog(DEBUG1, "page %u of relation %s is uninitialized",
75-
blkno, path);
76-
else
77-
elog(DEBUG1, "page %u of relation %s does not exist",
78-
blkno, path);
79-
pfree(path);
80-
}
100+
report_invalid_page(DEBUG1, node, forkno, blkno, present);
81101

82102
if (invalid_page_tab == NULL)
83103
{
@@ -181,6 +201,16 @@ forget_invalid_pages_db(Oid dbid)
181201
}
182202
}
183203

204+
/* Are there any unresolved references to invalid pages? */
205+
bool
206+
XLogHaveInvalidPages(void)
207+
{
208+
if (invalid_page_tab != NULL &&
209+
hash_get_num_entries(invalid_page_tab) > 0)
210+
return true;
211+
return false;
212+
}
213+
184214
/* Complain about any remaining invalid-page entries */
185215
void
186216
XLogCheckInvalidPages(void)
@@ -200,15 +230,8 @@ XLogCheckInvalidPages(void)
200230
*/
201231
while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
202232
{
203-
char *path = relpathperm(hentry->key.node, hentry->key.forkno);
204-
205-
if (hentry->present)
206-
elog(WARNING, "page %u of relation %s was uninitialized",
207-
hentry->key.blkno, path);
208-
else
209-
elog(WARNING, "page %u of relation %s did not exist",
210-
hentry->key.blkno, path);
211-
pfree(path);
233+
report_invalid_page(WARNING, hentry->key.node, hentry->key.forkno,
234+
hentry->key.blkno, hentry->present);
212235
foundone = true;
213236
}
214237

src/include/access/xlog.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,8 @@ typedef enum
190190

191191
extern XLogRecPtr XactLastRecEnd;
192192

193+
extern bool reachedMinRecoveryPoint;
194+
193195
/* these variables are GUC parameters related to XLOG */
194196
extern int CheckPointSegments;
195197
extern int wal_keep_segments;

src/include/access/xlogutils.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "storage/bufmgr.h"
1515

1616

17+
extern bool XLogHaveInvalidPages(void);
1718
extern void XLogCheckInvalidPages(void);
1819

1920
extern void XLogDropRelation(RelFileNode rnode, ForkNumber forknum);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy