Skip to content

Commit 8e7e672

Browse files
committed
WAL-log inplace update before revealing it to other sessions.
A buffer lock won't stop a reader having already checked tuple visibility. If a vac_update_datfrozenid() and then a crash happened during inplace update of a relfrozenxid value, datfrozenxid could overtake relfrozenxid. That could lead to "could not access status of transaction" errors. Back-patch to v12 (all supported versions). In v14 and earlier, this also back-patches the assertion removal from commit 7fcf2fa. Discussion: https://postgr.es/m/20240620012908.92.nmisch@google.com
1 parent 243e9b4 commit 8e7e672

File tree

2 files changed

+46
-16
lines changed

2 files changed

+46
-16
lines changed

src/backend/access/heap/README.tuplock

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,4 @@ Inplace updates create an exception to the rule that tuple data won't change
203203
under a reader holding a pin. A reader of a heap_fetch() result tuple may
204204
witness a torn read. Current inplace-updated fields are aligned and are no
205205
wider than four bytes, and current readers don't need consistency across
206-
fields. Hence, they get by with just fetching each field once. XXX such a
207-
caller may also read a value that has not reached WAL; see
208-
systable_inplace_update_finish().
206+
fields. Hence, they get by with just fetching each field once.

src/backend/access/heap/heapam.c

Lines changed: 45 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6326,6 +6326,8 @@ heap_inplace_update_and_unlock(Relation relation,
63266326
HeapTupleHeader htup = oldtup->t_data;
63276327
uint32 oldlen;
63286328
uint32 newlen;
6329+
char *dst;
6330+
char *src;
63296331
int nmsgs = 0;
63306332
SharedInvalidationMessage *invalMessages = NULL;
63316333
bool RelcacheInitFileInval = false;
@@ -6336,6 +6338,9 @@ heap_inplace_update_and_unlock(Relation relation,
63366338
if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
63376339
elog(ERROR, "wrong tuple length");
63386340

6341+
dst = (char *) htup + htup->t_hoff;
6342+
src = (char *) tuple->t_data + tuple->t_data->t_hoff;
6343+
63396344
/*
63406345
* Construct shared cache inval if necessary. Note that because we only
63416346
* pass the new version of the tuple, this mustn't be used for any
@@ -6359,15 +6364,15 @@ heap_inplace_update_and_unlock(Relation relation,
63596364
*/
63606365
PreInplace_Inval();
63616366

6362-
/* NO EREPORT(ERROR) from here till changes are logged */
6363-
START_CRIT_SECTION();
6364-
6365-
memcpy((char *) htup + htup->t_hoff,
6366-
(char *) tuple->t_data + tuple->t_data->t_hoff,
6367-
newlen);
6368-
63696367
/*----------
6370-
* XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid:
6368+
* NO EREPORT(ERROR) from here till changes are complete
6369+
*
6370+
* Our buffer lock won't stop a reader having already pinned and checked
6371+
* visibility for this tuple. Hence, we write WAL first, then mutate the
6372+
* buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
6373+
* checkpoint delay makes that acceptable. With the usual order of
6374+
* changes, a crash after memcpy() and before XLogInsert() could allow
6375+
* datfrozenxid to overtake relfrozenxid:
63716376
*
63726377
* ["D" is a VACUUM (ONLY_DATABASE_STATS)]
63736378
* ["R" is a VACUUM tbl]
@@ -6377,14 +6382,28 @@ heap_inplace_update_and_unlock(Relation relation,
63776382
* D: raise pg_database.datfrozenxid, XLogInsert(), finish
63786383
* [crash]
63796384
* [recovery restores datfrozenxid w/o relfrozenxid]
6385+
*
6386+
* Like in MarkBufferDirtyHint() subroutine XLogSaveBufferForHint(), copy
6387+
* the buffer to the stack before logging. Here, that facilitates a FPI
6388+
* of the post-mutation block before we accept other sessions seeing it.
63806389
*/
6381-
6382-
MarkBufferDirty(buffer);
6390+
Assert((MyProc->delayChkptFlags & DELAY_CHKPT_START) == 0);
6391+
START_CRIT_SECTION();
6392+
MyProc->delayChkptFlags |= DELAY_CHKPT_START;
63836393

63846394
/* XLOG stuff */
63856395
if (RelationNeedsWAL(relation))
63866396
{
63876397
xl_heap_inplace xlrec;
6398+
PGAlignedBlock copied_buffer;
6399+
char *origdata = (char *) BufferGetBlock(buffer);
6400+
Page page = BufferGetPage(buffer);
6401+
uint16 lower = ((PageHeader) page)->pd_lower;
6402+
uint16 upper = ((PageHeader) page)->pd_upper;
6403+
uintptr_t dst_offset_in_block;
6404+
RelFileLocator rlocator;
6405+
ForkNumber forkno;
6406+
BlockNumber blkno;
63886407
XLogRecPtr recptr;
63896408

63906409
xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
@@ -6399,16 +6418,28 @@ heap_inplace_update_and_unlock(Relation relation,
63996418
XLogRegisterData((char *) invalMessages,
64006419
nmsgs * sizeof(SharedInvalidationMessage));
64016420

6402-
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
6403-
XLogRegisterBufData(0, (char *) htup + htup->t_hoff, newlen);
6421+
/* register block matching what buffer will look like after changes */
6422+
memcpy(copied_buffer.data, origdata, lower);
6423+
memcpy(copied_buffer.data + upper, origdata + upper, BLCKSZ - upper);
6424+
dst_offset_in_block = dst - origdata;
6425+
memcpy(copied_buffer.data + dst_offset_in_block, src, newlen);
6426+
BufferGetTag(buffer, &rlocator, &forkno, &blkno);
6427+
Assert(forkno == MAIN_FORKNUM);
6428+
XLogRegisterBlock(0, &rlocator, forkno, blkno, copied_buffer.data,
6429+
REGBUF_STANDARD);
6430+
XLogRegisterBufData(0, src, newlen);
64046431

64056432
/* inplace updates aren't decoded atm, don't log the origin */
64066433

64076434
recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE);
64086435

6409-
PageSetLSN(BufferGetPage(buffer), recptr);
6436+
PageSetLSN(page, recptr);
64106437
}
64116438

6439+
memcpy(dst, src, newlen);
6440+
6441+
MarkBufferDirty(buffer);
6442+
64126443
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
64136444

64146445
/*
@@ -6421,6 +6452,7 @@ heap_inplace_update_and_unlock(Relation relation,
64216452
*/
64226453
AtInplace_Inval();
64236454

6455+
MyProc->delayChkptFlags &= ~DELAY_CHKPT_START;
64246456
END_CRIT_SECTION();
64256457
UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock);
64266458

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy