Skip to content

Commit 3dba9cb

Browse files
committed
Add a check on file size as an additional safety check that a WAL file
recovered from archive is not corrupt. It's not much but it will catch one common problem, viz out-of-disk-space. Also, force a WAL recovery scan when recovery.conf is present, even if pg_control shows a clean shutdown. This allows recovery with a tar backup that was taken with the postmaster shut down, as per complaint from Mark Kirkwood.
1 parent 406e8aa commit 3dba9cb

File tree

1 file changed

+69
-23
lines changed
  • src/backend/access/transam

1 file changed

+69
-23
lines changed

src/backend/access/transam/xlog.c

Lines changed: 69 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.150 2004/07/21 22:31:20 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.151 2004/07/22 20:18:40 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -432,7 +432,7 @@ static bool InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath,
432432
static int XLogFileOpen(uint32 log, uint32 seg);
433433
static int XLogFileRead(uint32 log, uint32 seg, int emode);
434434
static bool RestoreArchivedFile(char *path, const char *xlogfname,
435-
const char *recovername);
435+
const char *recovername, off_t expectedSize);
436436
static void PreallocXlogFiles(XLogRecPtr endptr);
437437
static void MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr);
438438
static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer);
@@ -1838,7 +1838,8 @@ XLogFileRead(uint32 log, uint32 seg, int emode)
18381838
{
18391839
XLogFileName(xlogfname, tli, log, seg);
18401840
restoredFromArchive = RestoreArchivedFile(path, xlogfname,
1841-
"RECOVERYXLOG");
1841+
"RECOVERYXLOG",
1842+
XLogSegSize);
18421843
}
18431844
else
18441845
XLogFilePath(path, tli, log, seg);
@@ -1876,10 +1877,14 @@ XLogFileRead(uint32 log, uint32 seg, int emode)
18761877
* If not successful, fill "path" with the name of the normal on-line file
18771878
* (which may or may not actually exist, but we'll try to use it), and return
18781879
* FALSE.
1880+
*
1881+
* For fixed-size files, the caller may pass the expected size as an
1882+
* additional crosscheck on successful recovery. If the file size is not
1883+
* known, set expectedSize = 0.
18791884
*/
18801885
static bool
18811886
RestoreArchivedFile(char *path, const char *xlogfname,
1882-
const char *recovername)
1887+
const char *recovername, off_t expectedSize)
18831888
{
18841889
char xlogpath[MAXPGPATH];
18851890
char xlogRestoreCmd[MAXPGPATH];
@@ -1991,19 +1996,42 @@ RestoreArchivedFile(char *path, const char *xlogfname,
19911996
rc = system(xlogRestoreCmd);
19921997
if (rc == 0)
19931998
{
1994-
/* restore success ... assuming file is really there now ... */
1995-
if (stat(xlogpath, &stat_buf) == 0) {
1996-
ereport(LOG,
1997-
(errmsg("restored log file \"%s\" from archive",
1998-
xlogfname)));
1999-
strcpy(path, xlogpath);
2000-
return true;
1999+
/*
2000+
* command apparently succeeded, but let's make sure the file is
2001+
* really there now and has the correct size.
2002+
*
2003+
* XXX I made wrong-size a fatal error to ensure the DBA would
2004+
* notice it, but is that too strong? We could try to plow ahead
2005+
* with a local copy of the file ... but the problem is that there
2006+
* probably isn't one, and we'd incorrectly conclude we've reached
2007+
* the end of WAL and we're done recovering ...
2008+
*/
2009+
if (stat(xlogpath, &stat_buf) == 0)
2010+
{
2011+
if (expectedSize > 0 && stat_buf.st_size != expectedSize)
2012+
ereport(FATAL,
2013+
(errmsg("archive file \"%s\" has wrong size: %lu instead of %lu",
2014+
xlogfname,
2015+
(unsigned long) stat_buf.st_size,
2016+
(unsigned long) expectedSize)));
2017+
else
2018+
{
2019+
ereport(LOG,
2020+
(errmsg("restored log file \"%s\" from archive",
2021+
xlogfname)));
2022+
strcpy(path, xlogpath);
2023+
return true;
2024+
}
2025+
}
2026+
else
2027+
{
2028+
/* stat failed */
2029+
if (errno != ENOENT)
2030+
ereport(FATAL,
2031+
(errcode_for_file_access(),
2032+
errmsg("could not stat \"%s\": %m",
2033+
xlogpath)));
20012034
}
2002-
if (errno != ENOENT)
2003-
ereport(FATAL,
2004-
(errcode_for_file_access(),
2005-
errmsg("could not stat \"%s\": %m",
2006-
xlogpath)));
20072035
}
20082036

20092037
/*
@@ -2664,7 +2692,7 @@ readTimeLineHistory(TimeLineID targetTLI)
26642692
if (InArchiveRecovery)
26652693
{
26662694
TLHistoryFileName(histfname, targetTLI);
2667-
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY");
2695+
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
26682696
}
26692697
else
26702698
TLHistoryFilePath(path, targetTLI);
@@ -2749,7 +2777,7 @@ existsTimeLineHistory(TimeLineID probeTLI)
27492777
if (InArchiveRecovery)
27502778
{
27512779
TLHistoryFileName(histfname, probeTLI);
2752-
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY");
2780+
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
27532781
}
27542782
else
27552783
TLHistoryFilePath(path, probeTLI);
@@ -2853,7 +2881,7 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
28532881
if (InArchiveRecovery)
28542882
{
28552883
TLHistoryFileName(histfname, parentTLI);
2856-
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY");
2884+
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
28572885
}
28582886
else
28592887
TLHistoryFilePath(path, parentTLI);
@@ -4042,6 +4070,11 @@ StartupXLOG(void)
40424070
if (checkPoint.undo.xrecoff == 0)
40434071
checkPoint.undo = RecPtr;
40444072

4073+
/*
4074+
* Check whether we need to force recovery from WAL. If it appears
4075+
* to have been a clean shutdown and we did not have a recovery.conf
4076+
* file, then assume no recovery needed.
4077+
*/
40454078
if (XLByteLT(checkPoint.undo, RecPtr) ||
40464079
XLByteLT(checkPoint.redo, RecPtr))
40474080
{
@@ -4054,13 +4087,23 @@ StartupXLOG(void)
40544087
InRecovery = true;
40554088

40564089
/* REDO */
4057-
if (InRecovery)
4090+
if (InRecovery || InArchiveRecovery)
40584091
{
40594092
int rmid;
40604093

4061-
ereport(LOG,
4062-
(errmsg("database system was not properly shut down; "
4063-
"automatic recovery in progress")));
4094+
if (InRecovery)
4095+
{
4096+
ereport(LOG,
4097+
(errmsg("database system was not properly shut down; "
4098+
"automatic recovery in progress")));
4099+
}
4100+
else
4101+
{
4102+
/* force recovery due to presence of recovery.conf */
4103+
InRecovery = true;
4104+
ereport(LOG,
4105+
(errmsg("automatic recovery in progress")));
4106+
}
40644107
ControlFile->state = DB_IN_RECOVERY;
40654108
ControlFile->time = time(NULL);
40664109
UpdateControlFile();
@@ -4158,8 +4201,11 @@ StartupXLOG(void)
41584201
InRedo = false;
41594202
}
41604203
else
4204+
{
4205+
/* there are no WAL records following the checkpoint */
41614206
ereport(LOG,
41624207
(errmsg("redo is not required")));
4208+
}
41634209
}
41644210

41654211
/*

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy