Skip to content

Commit 8582b4d

Browse files
committed
Improve handling of corrupted two-phase state files at recovery
When a corrupted two-phase state file is found by WAL replay, be it for crash recovery or archive recovery, then the file is simply skipped and a WARNING is logged to the user, causing the transaction to be silently lost. Facing an on-disk WAL file which is corrupted is as likely to happen as what is stored in WAL records, but WAL records are already able to fail hard if there is a CRC mismatch. On-disk two-phase state files, on the contrary, are simply ignored if corrupted. Note that when restoring the initial two-phase data state at recovery, files newer than the horizon XID are discarded hence no files present in pg_twophase/ should be torned and have been made durable by a previous checkpoint, so recovery should never see any corrupted two-phase state file by design. The situation got better since 978b2f6 which has added two-phase state information directly in WAL instead of using on-disk files, so the risk is limited to two-phase transactions which live across at least one checkpoint for long periods. Backups having legit two-phase state files on-disk could also lose silently transactions when restored if things get corrupted. This behavior exists since two-phase commit has been introduced, no back-patch is done for now per the lack of complaints about this problem. Author: Michael Paquier Discussion: https://postgr.es/m/20180709050309.GM1467@paquier.xyz
1 parent 7b6b167 commit 8582b4d

File tree

1 file changed

+59
-80
lines changed

1 file changed

+59
-80
lines changed

src/backend/access/transam/twophase.c

Lines changed: 59 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -1207,10 +1207,12 @@ RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info,
12071207
* Read and validate the state file for xid.
12081208
*
12091209
* If it looks OK (has a valid magic number and CRC), return the palloc'd
1210-
* contents of the file. Otherwise return NULL.
1210+
* contents of the file, issuing an error when finding corrupted data. If
1211+
* missing_ok is true, which indicates that missing files can be safely
1212+
* ignored, then return NULL. This state can be reached when doing recovery.
12111213
*/
12121214
static char *
1213-
ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
1215+
ReadTwoPhaseFile(TransactionId xid, bool missing_ok)
12141216
{
12151217
char path[MAXPGPATH];
12161218
char *buf;
@@ -1227,11 +1229,12 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
12271229
fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
12281230
if (fd < 0)
12291231
{
1230-
if (give_warnings)
1231-
ereport(WARNING,
1232-
(errcode_for_file_access(),
1233-
errmsg("could not open file \"%s\": %m", path)));
1234-
return NULL;
1232+
if (missing_ok && errno == ENOENT)
1233+
return NULL;
1234+
1235+
ereport(ERROR,
1236+
(errcode_for_file_access(),
1237+
errmsg("could not open file \"%s\": %m", path)));
12351238
}
12361239

12371240
/*
@@ -1241,35 +1244,27 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
12411244
* even on a valid file.
12421245
*/
12431246
if (fstat(fd, &stat))
1244-
{
1245-
int save_errno = errno;
1246-
1247-
CloseTransientFile(fd);
1248-
if (give_warnings)
1249-
{
1250-
errno = save_errno;
1251-
ereport(WARNING,
1252-
(errcode_for_file_access(),
1253-
errmsg("could not stat file \"%s\": %m", path)));
1254-
}
1255-
return NULL;
1256-
}
1247+
ereport(ERROR,
1248+
(errcode_for_file_access(),
1249+
errmsg("could not stat file \"%s\": %m", path)));
12571250

12581251
if (stat.st_size < (MAXALIGN(sizeof(TwoPhaseFileHeader)) +
12591252
MAXALIGN(sizeof(TwoPhaseRecordOnDisk)) +
12601253
sizeof(pg_crc32c)) ||
12611254
stat.st_size > MaxAllocSize)
1262-
{
1263-
CloseTransientFile(fd);
1264-
return NULL;
1265-
}
1255+
ereport(ERROR,
1256+
(errcode(ERRCODE_DATA_CORRUPTED),
1257+
errmsg_plural("incorrect size of file \"%s\": %zu byte",
1258+
"incorrect size of file \"%s\": %zu bytes",
1259+
(Size) stat.st_size, path,
1260+
(Size) stat.st_size)));
12661261

12671262
crc_offset = stat.st_size - sizeof(pg_crc32c);
12681263
if (crc_offset != MAXALIGN(crc_offset))
1269-
{
1270-
CloseTransientFile(fd);
1271-
return NULL;
1272-
}
1264+
ereport(ERROR,
1265+
(errcode(ERRCODE_DATA_CORRUPTED),
1266+
errmsg("incorrect alignment of CRC offset for file \"%s\"",
1267+
path)));
12731268

12741269
/*
12751270
* OK, slurp in the file.
@@ -1280,37 +1275,31 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
12801275
r = read(fd, buf, stat.st_size);
12811276
if (r != stat.st_size)
12821277
{
1283-
int save_errno = errno;
1284-
1285-
pgstat_report_wait_end();
1286-
CloseTransientFile(fd);
1287-
if (give_warnings)
1288-
{
1289-
if (r < 0)
1290-
{
1291-
errno = save_errno;
1292-
ereport(WARNING,
1293-
(errcode_for_file_access(),
1294-
errmsg("could not read file \"%s\": %m", path)));
1295-
}
1296-
else
1297-
ereport(WARNING,
1298-
(errmsg("could not read file \"%s\": read %d of %zu",
1299-
path, r, (Size) stat.st_size)));
1300-
}
1301-
pfree(buf);
1302-
return NULL;
1278+
if (r < 0)
1279+
ereport(ERROR,
1280+
(errcode_for_file_access(),
1281+
errmsg("could not read file \"%s\": %m", path)));
1282+
else
1283+
ereport(ERROR,
1284+
(errmsg("could not read file \"%s\": read %d of %zu",
1285+
path, r, (Size) stat.st_size)));
13031286
}
13041287

13051288
pgstat_report_wait_end();
13061289
CloseTransientFile(fd);
13071290

13081291
hdr = (TwoPhaseFileHeader *) buf;
1309-
if (hdr->magic != TWOPHASE_MAGIC || hdr->total_len != stat.st_size)
1310-
{
1311-
pfree(buf);
1312-
return NULL;
1313-
}
1292+
if (hdr->magic != TWOPHASE_MAGIC)
1293+
ereport(ERROR,
1294+
(errcode(ERRCODE_DATA_CORRUPTED),
1295+
errmsg("invalid magic number stored in file \"%s\"",
1296+
path)));
1297+
1298+
if (hdr->total_len != stat.st_size)
1299+
ereport(ERROR,
1300+
(errcode(ERRCODE_DATA_CORRUPTED),
1301+
errmsg("invalid size stored in file \"%s\"",
1302+
path)));
13141303

13151304
INIT_CRC32C(calc_crc);
13161305
COMP_CRC32C(calc_crc, buf, crc_offset);
@@ -1319,10 +1308,10 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
13191308
file_crc = *((pg_crc32c *) (buf + crc_offset));
13201309

13211310
if (!EQ_CRC32C(calc_crc, file_crc))
1322-
{
1323-
pfree(buf);
1324-
return NULL;
1325-
}
1311+
ereport(ERROR,
1312+
(errcode(ERRCODE_DATA_CORRUPTED),
1313+
errmsg("calculated CRC checksum does not match value stored in file \"%s\"",
1314+
path)));
13261315

13271316
return buf;
13281317
}
@@ -1431,7 +1420,7 @@ StandbyTransactionIdIsPrepared(TransactionId xid)
14311420
return false; /* nothing to do */
14321421

14331422
/* Read and validate file */
1434-
buf = ReadTwoPhaseFile(xid, false);
1423+
buf = ReadTwoPhaseFile(xid, true);
14351424
if (buf == NULL)
14361425
return false;
14371426

@@ -1479,7 +1468,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
14791468
* to disk if for some reason they have lived for a long time.
14801469
*/
14811470
if (gxact->ondisk)
1482-
buf = ReadTwoPhaseFile(xid, true);
1471+
buf = ReadTwoPhaseFile(xid, false);
14831472
else
14841473
XlogReadTwoPhaseData(gxact->prepare_start_lsn, &buf, NULL);
14851474

@@ -1874,6 +1863,10 @@ restoreTwoPhaseData(void)
18741863
* write a WAL entry, and so there might be no evidence in WAL of those
18751864
* subxact XIDs.
18761865
*
1866+
* On corrupted two-phase files, fail immediately. Keeping around broken
1867+
* entries and let replay continue causes harm on the system, and a new
1868+
* backup should be rolled in.
1869+
*
18771870
* Our other responsibility is to determine and return the oldest valid XID
18781871
* among the prepared xacts (if none, return ShmemVariableCache->nextXid).
18791872
* This is needed to synchronize pg_subtrans startup properly.
@@ -2164,15 +2157,7 @@ ProcessTwoPhaseBuffer(TransactionId xid,
21642157
if (fromdisk)
21652158
{
21662159
/* Read and validate file */
2167-
buf = ReadTwoPhaseFile(xid, true);
2168-
if (buf == NULL)
2169-
{
2170-
ereport(WARNING,
2171-
(errmsg("removing corrupt two-phase state file for transaction %u",
2172-
xid)));
2173-
RemoveTwoPhaseFile(xid, true);
2174-
return NULL;
2175-
}
2160+
buf = ReadTwoPhaseFile(xid, false);
21762161
}
21772162
else
21782163
{
@@ -2185,21 +2170,15 @@ ProcessTwoPhaseBuffer(TransactionId xid,
21852170
if (!TransactionIdEquals(hdr->xid, xid))
21862171
{
21872172
if (fromdisk)
2188-
{
2189-
ereport(WARNING,
2190-
(errmsg("removing corrupt two-phase state file for transaction %u",
2173+
ereport(ERROR,
2174+
(errcode(ERRCODE_DATA_CORRUPTED),
2175+
errmsg("corrupted two-phase state file for transaction \"%u\"",
21912176
xid)));
2192-
RemoveTwoPhaseFile(xid, true);
2193-
}
21942177
else
2195-
{
2196-
ereport(WARNING,
2197-
(errmsg("removing corrupt two-phase state from memory for transaction %u",
2178+
ereport(ERROR,
2179+
(errcode(ERRCODE_DATA_CORRUPTED),
2180+
errmsg("corrupted two-phase state in memory for transaction \"%u\"",
21982181
xid)));
2199-
PrepareRedoRemove(xid, true);
2200-
}
2201-
pfree(buf);
2202-
return NULL;
22032182
}
22042183

22052184
/*

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy