Skip to content

Commit 5f1d931

Browse files
committed
Fix race condition between hot standby and restoring a full-page image.
There was a window in RestoreBackupBlock where a page would be zeroed out, but not yet locked. If a backend pinned and locked the page in that window, it saw the zeroed page instead of the old page or new page contents, which could lead to missing rows in a result set, or errors. To fix, replace RBM_ZERO with RBM_ZERO_AND_LOCK, which atomically pins, zeroes, and locks the page, if it's not in the buffer cache already. In stable branches, the old RBM_ZERO constant is renamed to RBM_DO_NOT_USE, to avoid breaking any 3rd party extensions that might use RBM_ZERO. More importantly, this avoids renumbering the other enum values, which would cause even bigger confusion in extensions that use ReadBufferExtended, but haven't been recompiled. Backpatch to all supported versions; this has been racy since hot standby was introduced.
1 parent 4ddd9e7 commit 5f1d931

File tree

6 files changed

+66
-25
lines changed

6 files changed

+66
-25
lines changed

src/backend/access/hash/hashpage.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -158,9 +158,8 @@ _hash_getinitbuf(Relation rel, BlockNumber blkno)
158158
if (blkno == P_NEW)
159159
elog(ERROR, "hash AM does not use P_NEW");
160160

161-
buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_ZERO, NULL);
162-
163-
LockBuffer(buf, HASH_WRITE);
161+
buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_ZERO_AND_LOCK,
162+
NULL);
164163

165164
/* ref count and lock type are correct */
166165

@@ -201,11 +200,13 @@ _hash_getnewbuf(Relation rel, BlockNumber blkno, ForkNumber forkNum)
201200
if (BufferGetBlockNumber(buf) != blkno)
202201
elog(ERROR, "unexpected hash relation size: %u, should be %u",
203202
BufferGetBlockNumber(buf), blkno);
203+
LockBuffer(buf, HASH_WRITE);
204204
}
205205
else
206-
buf = ReadBufferExtended(rel, forkNum, blkno, RBM_ZERO, NULL);
207-
208-
LockBuffer(buf, HASH_WRITE);
206+
{
207+
buf = ReadBufferExtended(rel, forkNum, blkno, RBM_ZERO_AND_LOCK,
208+
NULL);
209+
}
209210

210211
/* ref count and lock type are correct */
211212

src/backend/access/heap/heapam.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4367,9 +4367,8 @@ heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record)
43674367
* not do anything that assumes we are touching a heap.
43684368
*/
43694369
buffer = XLogReadBufferExtended(xlrec->node, xlrec->forknum, xlrec->blkno,
4370-
RBM_ZERO);
4370+
RBM_ZERO_AND_LOCK);
43714371
Assert(BufferIsValid(buffer));
4372-
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
43734372
page = (Page) BufferGetPage(buffer);
43744373

43754374
Assert(record->xl_len == SizeOfHeapNewpage + BLCKSZ);

src/backend/access/transam/xlog.c

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3620,12 +3620,8 @@ RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record, int block_index,
36203620
{
36213621
/* Found it, apply the update */
36223622
buffer = XLogReadBufferExtended(bkpb.node, bkpb.fork, bkpb.block,
3623-
RBM_ZERO);
3623+
get_cleanup_lock ? RBM_ZERO_AND_CLEANUP_LOCK : RBM_ZERO_AND_LOCK);
36243624
Assert(BufferIsValid(buffer));
3625-
if (get_cleanup_lock)
3626-
LockBufferForCleanup(buffer);
3627-
else
3628-
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
36293625

36303626
page = (Page) BufferGetPage(buffer);
36313627

src/backend/access/transam/xlogutils.c

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -234,16 +234,17 @@ XLogCheckInvalidPages(void)
234234
* The returned buffer is exclusively-locked.
235235
*
236236
* For historical reasons, instead of a ReadBufferMode argument, this only
237-
* supports RBM_ZERO (init == true) and RBM_NORMAL (init == false) modes.
237+
* supports RBM_ZERO_AND_LOCK (init == true) and RBM_NORMAL (init == false)
238+
* modes.
238239
*/
239240
Buffer
240241
XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
241242
{
242243
Buffer buf;
243244

244245
buf = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno,
245-
init ? RBM_ZERO : RBM_NORMAL);
246-
if (BufferIsValid(buf))
246+
init ? RBM_ZERO_AND_LOCK : RBM_NORMAL);
247+
if (BufferIsValid(buf) && !init)
247248
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
248249

249250
return buf;
@@ -262,8 +263,8 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
262263
* dropped or truncated. If we don't see evidence of that later in the WAL
263264
* sequence, we'll complain at the end of WAL replay.)
264265
*
265-
* In RBM_ZERO and RBM_ZERO_ON_ERROR modes, if the page doesn't exist, the
266-
* relation is extended with all-zeroes pages up to the given block number.
266+
* In RBM_ZERO_* modes, if the page doesn't exist, the relation is extended
267+
* with all-zeroes pages up to the given block number.
267268
*
268269
* In RBM_NORMAL_NO_LOG mode, we return InvalidBuffer if the page doesn't
269270
* exist, and we don't check for all-zeroes. Thus, no log entry is made
@@ -317,14 +318,20 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
317318
do
318319
{
319320
if (buffer != InvalidBuffer)
321+
{
322+
if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK)
323+
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
320324
ReleaseBuffer(buffer);
325+
}
321326
buffer = ReadBufferWithoutRelcache(rnode, forknum,
322327
P_NEW, mode, NULL);
323328
}
324329
while (BufferGetBlockNumber(buffer) < blkno);
325330
/* Handle the corner case that P_NEW returns non-consecutive pages */
326331
if (BufferGetBlockNumber(buffer) != blkno)
327332
{
333+
if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK)
334+
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
328335
ReleaseBuffer(buffer);
329336
buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno,
330337
mode, NULL);

src/backend/storage/buffer/bufmgr.c

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -208,14 +208,19 @@ ReadBuffer(Relation reln, BlockNumber blockNum)
208208
* valid, the page is zeroed instead of throwing an error. This is intended
209209
* for non-critical data, where the caller is prepared to repair errors.
210210
*
211-
* In RBM_ZERO mode, if the page isn't in buffer cache already, it's filled
212-
* with zeros instead of reading it from disk. Useful when the caller is
213-
* going to fill the page from scratch, since this saves I/O and avoids
211+
* In RBM_ZERO_AND_LOCK mode, if the page isn't in buffer cache already, it's
212+
* filled with zeros instead of reading it from disk. Useful when the caller
213+
* is going to fill the page from scratch, since this saves I/O and avoids
214214
* unnecessary failure if the page-on-disk has corrupt page headers.
215+
* The page is returned locked to ensure that the caller has a chance to
216+
* initialize the page before it's made visible to others.
215217
* Caution: do not use this mode to read a page that is beyond the relation's
216218
* current physical EOF; that is likely to cause problems in md.c when
217219
* the page is modified and written out. P_NEW is OK, though.
218220
*
221+
* RBM_ZERO_AND_CLEANUP_LOCK is the same as RBM_ZERO_AND_LOCK, but acquires
222+
* a cleanup-strength lock on the page.
223+
*
219224
* RBM_NORMAL_NO_LOG mode is treated the same as RBM_NORMAL here.
220225
*
221226
* If strategy is not NULL, a nondefault buffer access strategy is used.
@@ -356,6 +361,18 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
356361
isExtend,
357362
found);
358363

364+
/*
365+
* In RBM_ZERO_AND_LOCK mode, the caller expects the buffer to
366+
* be already locked on return.
367+
*/
368+
if (!isLocalBuf)
369+
{
370+
if (mode == RBM_ZERO_AND_LOCK)
371+
LWLockAcquire(bufHdr->content_lock, LW_EXCLUSIVE);
372+
else if (mode == RBM_ZERO_AND_CLEANUP_LOCK)
373+
LockBufferForCleanup(BufferDescriptorGetBuffer(bufHdr));
374+
}
375+
359376
return BufferDescriptorGetBuffer(bufHdr);
360377
}
361378

@@ -436,8 +453,11 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
436453
* Read in the page, unless the caller intends to overwrite it and
437454
* just wants us to allocate a buffer.
438455
*/
439-
if (mode == RBM_ZERO)
456+
if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK ||
457+
mode == RBM_DO_NOT_USE)
458+
{
440459
MemSet((char *) bufBlock, 0, BLCKSZ);
460+
}
441461
else
442462
{
443463
smgrread(smgr, forkNum, blockNum, (char *) bufBlock);
@@ -464,6 +484,19 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
464484
}
465485
}
466486

487+
/*
488+
* In RBM_ZERO_AND_LOCK mode, grab the buffer content lock before marking
489+
* the page as valid, to make sure that no other backend sees the zeroed
490+
* page before the caller has had a chance to initialize it.
491+
*
492+
* Since no-one else can be looking at the page contents yet, there is no
493+
* difference between an exclusive lock and a cleanup-strength lock.
494+
* (Note that we cannot use LockBuffer() of LockBufferForCleanup() here,
495+
* because they assert that the buffer is already valid.)
496+
*/
497+
if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK)
498+
LWLockAcquire(bufHdr->content_lock, LW_EXCLUSIVE);
499+
467500
if (isLocalBuf)
468501
{
469502
/* Only need to adjust flags */

src/include/storage/bufmgr.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,16 @@ typedef enum BufferAccessStrategyType
3636
typedef enum
3737
{
3838
RBM_NORMAL, /* Normal read */
39-
RBM_ZERO, /* Don't read from disk, caller will
40-
* initialize */
39+
RBM_DO_NOT_USE, /* This used to be RBM_ZERO. Only kept for
40+
* binary compatibility with 3rd party
41+
* extensions. */
4142
RBM_ZERO_ON_ERROR, /* Read, but return an all-zeros page on error */
42-
RBM_NORMAL_NO_LOG /* Don't log page as invalid during WAL
43+
RBM_NORMAL_NO_LOG, /* Don't log page as invalid during WAL
4344
* replay; otherwise same as RBM_NORMAL */
45+
RBM_ZERO_AND_LOCK, /* Don't read from disk, caller will
46+
* initialize. Also locks the page. */
47+
RBM_ZERO_AND_CLEANUP_LOCK /* Like RBM_ZERO_AND_LOCK, but locks the page
48+
* in "cleanup" mode */
4449
} ReadBufferMode;
4550

4651
/* in globals.c ... this duplicates miscadmin.h */

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy