Skip to content

Commit 8c3cc86

Browse files
committed
During WAL recovery, when reading a page that we intend to overwrite completely
from the WAL data, don't bother to physically read it; just have bufmgr.c return a zeroed-out buffer instead. This speeds recovery significantly, and also avoids unnecessary failures when a page-to-be-overwritten has corrupt page headers on disk. This replaces a former kluge that accomplished the latter by pretending zero_damaged_pages was always ON during WAL recovery; which was OK when the kluge was put in, but is unsafe when restoring a WAL log that was written with full_page_writes off. Heikki Linnakangas
1 parent 8ec9438 commit 8c3cc86

File tree

3 files changed

+49
-13
lines changed

3 files changed

+49
-13
lines changed

src/backend/access/transam/xlogutils.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
1212
* Portions Copyright (c) 1994, Regents of the University of California
1313
*
14-
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.49 2007/01/05 22:19:24 momjian Exp $
14+
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.50 2007/05/02 23:18:03 tgl Exp $
1515
*
1616
*-------------------------------------------------------------------------
1717
*/
@@ -206,7 +206,9 @@ XLogCheckInvalidPages(void)
206206
* If "init" is true then the caller intends to rewrite the page fully
207207
* using the info in the XLOG record. In this case we will extend the
208208
* relation if needed to make the page exist, and we will not complain about
209-
* the page being "new" (all zeroes).
209+
* the page being "new" (all zeroes); in fact, we usually will supply a
210+
* zeroed buffer without reading the page at all, so as to avoid unnecessary
211+
* failure if the page is present on disk but has corrupt headers.
210212
*
211213
* If "init" is false then the caller needs the page to be valid already.
212214
* If the page doesn't exist or contains zeroes, we return InvalidBuffer.
@@ -226,7 +228,10 @@ XLogReadBuffer(Relation reln, BlockNumber blkno, bool init)
226228
if (blkno < lastblock)
227229
{
228230
/* page exists in file */
229-
buffer = ReadBuffer(reln, blkno);
231+
if (init)
232+
buffer = ReadOrZeroBuffer(reln, blkno);
233+
else
234+
buffer = ReadBuffer(reln, blkno);
230235
}
231236
else
232237
{

src/backend/storage/buffer/bufmgr.c

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.216 2007/03/30 18:34:55 mha Exp $
11+
* $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.217 2007/05/02 23:18:03 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -17,6 +17,12 @@
1717
* and pin it so that no one can destroy it while this process
1818
* is using it.
1919
*
20+
* ReadOrZeroBuffer() -- like ReadBuffer, but if the page is not already in
21+
* cache we don't read it, but just return a zeroed-out buffer. Useful
22+
* when the caller intends to fill the page from scratch, since this
23+
* saves I/O and avoids unnecessary failure if the page-on-disk has
24+
* corrupt page headers.
25+
*
2026
* ReleaseBuffer() -- unpin a buffer
2127
*
2228
* MarkBufferDirty() -- mark a pinned buffer's contents as "dirty".
@@ -87,6 +93,8 @@ static volatile BufferDesc *PinCountWaitBuf = NULL;
8793
extern PgStat_MsgBgWriter BgWriterStats;
8894

8995

96+
static Buffer ReadBuffer_common(Relation reln, BlockNumber blockNum,
97+
bool zeroPage);
9098
static bool PinBuffer(volatile BufferDesc *buf);
9199
static void PinBuffer_Locked(volatile BufferDesc *buf);
92100
static void UnpinBuffer(volatile BufferDesc *buf,
@@ -120,6 +128,27 @@ static void AtProcExit_Buffers(int code, Datum arg);
120128
*/
121129
Buffer
122130
ReadBuffer(Relation reln, BlockNumber blockNum)
131+
{
132+
return ReadBuffer_common(reln, blockNum, false);
133+
}
134+
135+
/*
136+
* ReadOrZeroBuffer -- like ReadBuffer, but if the page isn't in buffer
137+
* cache already, it's filled with zeros instead of reading it from
138+
* disk. The caller is expected to overwrite the whole buffer,
139+
* so that the current page contents are not interesting.
140+
*/
141+
Buffer
142+
ReadOrZeroBuffer(Relation reln, BlockNumber blockNum)
143+
{
144+
return ReadBuffer_common(reln, blockNum, true);
145+
}
146+
147+
/*
148+
* ReadBuffer_common -- common logic for ReadBuffer and ReadOrZeroBuffer
149+
*/
150+
static Buffer
151+
ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage)
123152
{
124153
volatile BufferDesc *bufHdr;
125154
Block bufBlock;
@@ -253,17 +282,18 @@ ReadBuffer(Relation reln, BlockNumber blockNum)
253282
}
254283
else
255284
{
256-
smgrread(reln->rd_smgr, blockNum, (char *) bufBlock);
285+
/*
286+
* Read in the page, unless the caller intends to overwrite it
287+
* and just wants us to allocate a buffer.
288+
*/
289+
if (zeroPage)
290+
MemSet((char *) bufBlock, 0, BLCKSZ);
291+
else
292+
smgrread(reln->rd_smgr, blockNum, (char *) bufBlock);
257293
/* check for garbage data */
258294
if (!PageHeaderIsValid((PageHeader) bufBlock))
259295
{
260-
/*
261-
* During WAL recovery, the first access to any data page should
262-
* overwrite the whole page from the WAL; so a clobbered page
263-
* header is not reason to fail. Hence, when InRecovery we may
264-
* always act as though zero_damaged_pages is ON.
265-
*/
266-
if (zero_damaged_pages || InRecovery)
296+
if (zero_damaged_pages)
267297
{
268298
ereport(WARNING,
269299
(errcode(ERRCODE_DATA_CORRUPTED),

src/include/storage/bufmgr.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.102 2007/01/05 22:19:57 momjian Exp $
10+
* $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.103 2007/05/02 23:18:03 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -111,6 +111,7 @@ extern DLLIMPORT int32 *LocalRefCount;
111111
* prototypes for functions in bufmgr.c
112112
*/
113113
extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
114+
extern Buffer ReadOrZeroBuffer(Relation reln, BlockNumber blockNum);
114115
extern void ReleaseBuffer(Buffer buffer);
115116
extern void UnlockReleaseBuffer(Buffer buffer);
116117
extern void MarkBufferDirty(Buffer buffer);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy