Content-Length: 665670 | pFad | http://github.com/postgrespro/postgres/commit/5f1d931cf3fd3653c3f11835b4aa1dc04edb63ad

C4 Fix race condition between hot standby and restoring a full-page image. · postgrespro/postgres@5f1d931 · GitHub
Skip to content

Commit 5f1d931

Browse files
committed
Fix race condition between hot standby and restoring a full-page image.
There was a window in RestoreBackupBlock where a page would be zeroed out, but not yet locked. If a backend pinned and locked the page in that window, it saw the zeroed page instead of the old page or new page contents, which could lead to missing rows in a result set, or errors. To fix, replace RBM_ZERO with RBM_ZERO_AND_LOCK, which atomically pins, zeroes, and locks the page, if it's not in the buffer cache already. In stable branches, the old RBM_ZERO constant is renamed to RBM_DO_NOT_USE, to avoid breaking any 3rd party extensions that might use RBM_ZERO. More importantly, this avoids renumbering the other enum values, which would cause even bigger confusion in extensions that use ReadBufferExtended, but haven't been recompiled. Backpatch to all supported versions; this has been racy since hot standby was introduced.
1 parent 4ddd9e7 commit 5f1d931

File tree

6 files changed

+66
-25
lines changed

6 files changed

+66
-25
lines changed

src/backend/access/hash/hashpage.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -158,9 +158,8 @@ _hash_getinitbuf(Relation rel, BlockNumber blkno)
158158
if (blkno == P_NEW)
159159
elog(ERROR, "hash AM does not use P_NEW");
160160

161-
buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_ZERO, NULL);
162-
163-
LockBuffer(buf, HASH_WRITE);
161+
buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_ZERO_AND_LOCK,
162+
NULL);
164163

165164
/* ref count and lock type are correct */
166165

@@ -201,11 +200,13 @@ _hash_getnewbuf(Relation rel, BlockNumber blkno, ForkNumber forkNum)
201200
if (BufferGetBlockNumber(buf) != blkno)
202201
elog(ERROR, "unexpected hash relation size: %u, should be %u",
203202
BufferGetBlockNumber(buf), blkno);
203+
LockBuffer(buf, HASH_WRITE);
204204
}
205205
else
206-
buf = ReadBufferExtended(rel, forkNum, blkno, RBM_ZERO, NULL);
207-
208-
LockBuffer(buf, HASH_WRITE);
206+
{
207+
buf = ReadBufferExtended(rel, forkNum, blkno, RBM_ZERO_AND_LOCK,
208+
NULL);
209+
}
209210

210211
/* ref count and lock type are correct */
211212

src/backend/access/heap/heapam.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4367,9 +4367,8 @@ heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record)
43674367
* not do anything that assumes we are touching a heap.
43684368
*/
43694369
buffer = XLogReadBufferExtended(xlrec->node, xlrec->forknum, xlrec->blkno,
4370-
RBM_ZERO);
4370+
RBM_ZERO_AND_LOCK);
43714371
Assert(BufferIsValid(buffer));
4372-
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
43734372
page = (Page) BufferGetPage(buffer);
43744373

43754374
Assert(record->xl_len == SizeOfHeapNewpage + BLCKSZ);

src/backend/access/transam/xlog.c

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3620,12 +3620,8 @@ RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record, int block_index,
36203620
{
36213621
/* Found it, apply the update */
36223622
buffer = XLogReadBufferExtended(bkpb.node, bkpb.fork, bkpb.block,
3623-
RBM_ZERO);
3623+
get_cleanup_lock ? RBM_ZERO_AND_CLEANUP_LOCK : RBM_ZERO_AND_LOCK);
36243624
Assert(BufferIsValid(buffer));
3625-
if (get_cleanup_lock)
3626-
LockBufferForCleanup(buffer);
3627-
else
3628-
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
36293625

36303626
page = (Page) BufferGetPage(buffer);
36313627

src/backend/access/transam/xlogutils.c

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -234,16 +234,17 @@ XLogCheckInvalidPages(void)
234234
* The returned buffer is exclusively-locked.
235235
*
236236
* For historical reasons, instead of a ReadBufferMode argument, this only
237-
* supports RBM_ZERO (init == true) and RBM_NORMAL (init == false) modes.
237+
* supports RBM_ZERO_AND_LOCK (init == true) and RBM_NORMAL (init == false)
238+
* modes.
238239
*/
239240
Buffer
240241
XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
241242
{
242243
Buffer buf;
243244

244245
buf = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno,
245-
init ? RBM_ZERO : RBM_NORMAL);
246-
if (BufferIsValid(buf))
246+
init ? RBM_ZERO_AND_LOCK : RBM_NORMAL);
247+
if (BufferIsValid(buf) && !init)
247248
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
248249

249250
return buf;
@@ -262,8 +263,8 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
262263
* dropped or truncated. If we don't see evidence of that later in the WAL
263264
* sequence, we'll complain at the end of WAL replay.)
264265
*
265-
* In RBM_ZERO and RBM_ZERO_ON_ERROR modes, if the page doesn't exist, the
266-
* relation is extended with all-zeroes pages up to the given block number.
266+
* In RBM_ZERO_* modes, if the page doesn't exist, the relation is extended
267+
* with all-zeroes pages up to the given block number.
267268
*
268269
* In RBM_NORMAL_NO_LOG mode, we return InvalidBuffer if the page doesn't
269270
* exist, and we don't check for all-zeroes. Thus, no log entry is made
@@ -317,14 +318,20 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
317318
do
318319
{
319320
if (buffer != InvalidBuffer)
321+
{
322+
if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK)
323+
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
320324
ReleaseBuffer(buffer);
325+
}
321326
buffer = ReadBufferWithoutRelcache(rnode, forknum,
322327
P_NEW, mode, NULL);
323328
}
324329
while (BufferGetBlockNumber(buffer) < blkno);
325330
/* Handle the corner case that P_NEW returns non-consecutive pages */
326331
if (BufferGetBlockNumber(buffer) != blkno)
327332
{
333+
if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK)
334+
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
328335
ReleaseBuffer(buffer);
329336
buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno,
330337
mode, NULL);

src/backend/storage/buffer/bufmgr.c

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -208,14 +208,19 @@ ReadBuffer(Relation reln, BlockNumber blockNum)
208208
* valid, the page is zeroed instead of throwing an error. This is intended
209209
* for non-critical data, where the caller is prepared to repair errors.
210210
*
211-
* In RBM_ZERO mode, if the page isn't in buffer cache already, it's filled
212-
* with zeros instead of reading it from disk. Useful when the caller is
213-
* going to fill the page from scratch, since this saves I/O and avoids
211+
* In RBM_ZERO_AND_LOCK mode, if the page isn't in buffer cache already, it's
212+
* filled with zeros instead of reading it from disk. Useful when the caller
213+
* is going to fill the page from scratch, since this saves I/O and avoids
214214
* unnecessary failure if the page-on-disk has corrupt page headers.
215+
* The page is returned locked to ensure that the caller has a chance to
216+
* initialize the page before it's made visible to others.
215217
* Caution: do not use this mode to read a page that is beyond the relation's
216218
* current physical EOF; that is likely to cause problems in md.c when
217219
* the page is modified and written out. P_NEW is OK, though.
218220
*
221+
* RBM_ZERO_AND_CLEANUP_LOCK is the same as RBM_ZERO_AND_LOCK, but acquires
222+
* a cleanup-strength lock on the page.
223+
*
219224
* RBM_NORMAL_NO_LOG mode is treated the same as RBM_NORMAL here.
220225
*
221226
* If strategy is not NULL, a nondefault buffer access strategy is used.
@@ -356,6 +361,18 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
356361
isExtend,
357362
found);
358363

364+
/*
365+
* In RBM_ZERO_AND_LOCK mode, the caller expects the buffer to
366+
* be already locked on return.
367+
*/
368+
if (!isLocalBuf)
369+
{
370+
if (mode == RBM_ZERO_AND_LOCK)
371+
LWLockAcquire(bufHdr->content_lock, LW_EXCLUSIVE);
372+
else if (mode == RBM_ZERO_AND_CLEANUP_LOCK)
373+
LockBufferForCleanup(BufferDescriptorGetBuffer(bufHdr));
374+
}
375+
359376
return BufferDescriptorGetBuffer(bufHdr);
360377
}
361378

@@ -436,8 +453,11 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
436453
* Read in the page, unless the caller intends to overwrite it and
437454
* just wants us to allocate a buffer.
438455
*/
439-
if (mode == RBM_ZERO)
456+
if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK ||
457+
mode == RBM_DO_NOT_USE)
458+
{
440459
MemSet((char *) bufBlock, 0, BLCKSZ);
460+
}
441461
else
442462
{
443463
smgrread(smgr, forkNum, blockNum, (char *) bufBlock);
@@ -464,6 +484,19 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
464484
}
465485
}
466486

487+
/*
488+
* In RBM_ZERO_AND_LOCK mode, grab the buffer content lock before marking
489+
* the page as valid, to make sure that no other backend sees the zeroed
490+
* page before the caller has had a chance to initialize it.
491+
*
492+
* Since no-one else can be looking at the page contents yet, there is no
493+
* difference between an exclusive lock and a cleanup-strength lock.
494+
* (Note that we cannot use LockBuffer() of LockBufferForCleanup() here,
495+
* because they assert that the buffer is already valid.)
496+
*/
497+
if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK)
498+
LWLockAcquire(bufHdr->content_lock, LW_EXCLUSIVE);
499+
467500
if (isLocalBuf)
468501
{
469502
/* Only need to adjust flags */

src/include/storage/bufmgr.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,16 @@ typedef enum BufferAccessStrategyType
3636
typedef enum
3737
{
3838
RBM_NORMAL, /* Normal read */
39-
RBM_ZERO, /* Don't read from disk, caller will
40-
* initialize */
39+
RBM_DO_NOT_USE, /* This used to be RBM_ZERO. Only kept for
40+
* binary compatibility with 3rd party
41+
* extensions. */
4142
RBM_ZERO_ON_ERROR, /* Read, but return an all-zeros page on error */
42-
RBM_NORMAL_NO_LOG /* Don't log page as invalid during WAL
43+
RBM_NORMAL_NO_LOG, /* Don't log page as invalid during WAL
4344
* replay; otherwise same as RBM_NORMAL */
45+
RBM_ZERO_AND_LOCK, /* Don't read from disk, caller will
46+
* initialize. Also locks the page. */
47+
RBM_ZERO_AND_CLEANUP_LOCK /* Like RBM_ZERO_AND_LOCK, but locks the page
48+
* in "cleanup" mode */
4449
} ReadBufferMode;
4550

4651
/* in globals.c ... this duplicates miscadmin.h */

0 commit comments

Comments
 (0)








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: http://github.com/postgrespro/postgres/commit/5f1d931cf3fd3653c3f11835b4aa1dc04edb63ad

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy