Skip to content

Commit 68e605b

Browse files
committed
Rethink the delay-checkpoint-end mechanism in the back-branches.
The back-patch of commit bbace56 had the unfortunate effect of changing the layout of PGPROC in the back-branches, which could break extensions. This happened because it changed the delayChkpt from type bool to type int. So, change it back, and add a new bool delayChkptEnd field instead. The new field should fall within what used to be padding space within the struct, and so hopefully won't cause any extensions to break. Per report from Markus Wanner and discussion with Tom Lane and others. Patch originally by me, somewhat revised by Markus Wanner per a suggestion from Michael Paquier. A very similar patch was developed by Kyotaro Horiguchi, but I failed to see the email in which that was posted before writing one of my own. Discussion: http://postgr.es/m/CA+Tgmoao-kUD9c5nG5sub3F7tbo39+cdr8jKaOVEs_1aBWcJ3Q@mail.gmail.com Discussion: http://postgr.es/m/20220406.164521.17171257901083417.horikyota.ntt@gmail.com
1 parent 5378d55 commit 68e605b

File tree

10 files changed

+114
-80
lines changed

10 files changed

+114
-80
lines changed

src/backend/access/transam/multixact.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3069,8 +3069,8 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
30693069
* crash/basebackup, even though the state of the data directory would
30703070
* require it.
30713071
*/
3072-
Assert((MyPgXact->delayChkpt & DELAY_CHKPT_START) == 0);
3073-
MyPgXact->delayChkpt |= DELAY_CHKPT_START;
3072+
Assert(!MyPgXact->delayChkpt);
3073+
MyPgXact->delayChkpt = true;
30743074

30753075
/* WAL log truncation */
30763076
WriteMTruncateXlogRec(newOldestMultiDB,
@@ -3096,7 +3096,7 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
30963096
/* Then offsets */
30973097
PerformOffsetsTruncation(oldestMulti, newOldestMulti);
30983098

3099-
MyPgXact->delayChkpt &= ~DELAY_CHKPT_START;
3099+
MyPgXact->delayChkpt = false;
31003100

31013101
END_CRIT_SECTION();
31023102
LWLockRelease(MultiXactTruncationLock);

src/backend/access/transam/twophase.c

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -477,8 +477,9 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid,
477477
}
478478
pgxact->xid = xid;
479479
pgxact->xmin = InvalidTransactionId;
480-
pgxact->delayChkpt = 0;
480+
pgxact->delayChkpt = false;
481481
pgxact->vacuumFlags = 0;
482+
proc->delayChkptEnd = false;
482483
proc->pid = 0;
483484
proc->databaseId = databaseid;
484485
proc->roleId = owner;
@@ -1187,8 +1188,8 @@ EndPrepare(GlobalTransaction gxact)
11871188

11881189
START_CRIT_SECTION();
11891190

1190-
Assert((MyPgXact->delayChkpt & DELAY_CHKPT_START) == 0);
1191-
MyPgXact->delayChkpt |= DELAY_CHKPT_START;
1191+
Assert(!MyPgXact->delayChkpt);
1192+
MyPgXact->delayChkpt = true;
11921193

11931194
XLogBeginInsert();
11941195
for (record = records.head; record != NULL; record = record->next)
@@ -1231,7 +1232,7 @@ EndPrepare(GlobalTransaction gxact)
12311232
* checkpoint starting after this will certainly see the gxact as a
12321233
* candidate for fsyncing.
12331234
*/
1234-
MyPgXact->delayChkpt &= ~DELAY_CHKPT_START;
1235+
MyPgXact->delayChkpt = false;
12351236

12361237
/*
12371238
* Remember that we have this GlobalTransaction entry locked for us. If
@@ -2338,8 +2339,8 @@ RecordTransactionCommitPrepared(TransactionId xid,
23382339
START_CRIT_SECTION();
23392340

23402341
/* See notes in RecordTransactionCommit */
2341-
Assert((MyPgXact->delayChkpt & DELAY_CHKPT_START) == 0);
2342-
MyPgXact->delayChkpt |= DELAY_CHKPT_START;
2342+
Assert(!MyPgXact->delayChkpt);
2343+
MyPgXact->delayChkpt = true;
23432344

23442345
/*
23452346
* Emit the XLOG commit record. Note that we mark 2PC commits as
@@ -2387,7 +2388,7 @@ RecordTransactionCommitPrepared(TransactionId xid,
23872388
TransactionIdCommitTree(xid, nchildren, children);
23882389

23892390
/* Checkpoint can proceed now */
2390-
MyPgXact->delayChkpt &= ~DELAY_CHKPT_START;
2391+
MyPgXact->delayChkpt = false;
23912392

23922393
END_CRIT_SECTION();
23932394

src/backend/access/transam/xact.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1306,9 +1306,9 @@ RecordTransactionCommit(void)
13061306
* This makes checkpoint's determination of which xacts are delayChkpt
13071307
* a bit fuzzy, but it doesn't matter.
13081308
*/
1309-
Assert((MyPgXact->delayChkpt & DELAY_CHKPT_START) == 0);
1309+
Assert(!MyPgXact->delayChkpt);
13101310
START_CRIT_SECTION();
1311-
MyPgXact->delayChkpt |= DELAY_CHKPT_START;
1311+
MyPgXact->delayChkpt = true;
13121312

13131313
SetCurrentTransactionStopTimestamp();
13141314

@@ -1409,7 +1409,7 @@ RecordTransactionCommit(void)
14091409
*/
14101410
if (markXidCommitted)
14111411
{
1412-
MyPgXact->delayChkpt &= ~DELAY_CHKPT_START;
1412+
MyPgXact->delayChkpt = false;
14131413
END_CRIT_SECTION();
14141414
}
14151415

src/backend/access/transam/xlog.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8920,27 +8920,25 @@ CreateCheckPoint(int flags)
89208920
* and we will correctly flush the update below. So we cannot miss any
89218921
* xacts we need to wait for.
89228922
*/
8923-
vxids = GetVirtualXIDsDelayingChkpt(&nvxids, DELAY_CHKPT_START);
8923+
vxids = GetVirtualXIDsDelayingChkpt(&nvxids);
89248924
if (nvxids > 0)
89258925
{
89268926
do
89278927
{
89288928
pg_usleep(10000L); /* wait for 10 msec */
8929-
} while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
8930-
DELAY_CHKPT_START));
8929+
} while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids));
89318930
}
89328931
pfree(vxids);
89338932

89348933
CheckPointGuts(checkPoint.redo, flags);
89358934

8936-
vxids = GetVirtualXIDsDelayingChkpt(&nvxids, DELAY_CHKPT_COMPLETE);
8935+
vxids = GetVirtualXIDsDelayingChkptEnd(&nvxids);
89378936
if (nvxids > 0)
89388937
{
89398938
do
89408939
{
89418940
pg_usleep(10000L); /* wait for 10 msec */
8942-
} while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
8943-
DELAY_CHKPT_COMPLETE));
8941+
} while (HaveVirtualXIDsDelayingChkptEnd(vxids, nvxids));
89448942
}
89458943
pfree(vxids);
89468944

src/backend/access/transam/xloginsert.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -899,7 +899,7 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
899899
/*
900900
* Ensure no checkpoint can change our view of RedoRecPtr.
901901
*/
902-
Assert((MyPgXact->delayChkpt & DELAY_CHKPT_START) != 0);
902+
Assert(MyPgXact->delayChkpt);
903903

904904
/*
905905
* Update RedoRecPtr so that we can make the right decision

src/backend/catalog/storage.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -266,8 +266,8 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
266266
* the blocks to not exist on disk at all, but not for them to have the
267267
* wrong contents.
268268
*/
269-
Assert((MyPgXact->delayChkpt & DELAY_CHKPT_COMPLETE) == 0);
270-
MyPgXact->delayChkpt |= DELAY_CHKPT_COMPLETE;
269+
Assert(!MyProc->delayChkptEnd);
270+
MyProc->delayChkptEnd = true;
271271

272272
/*
273273
* We WAL-log the truncation before actually truncating, which means
@@ -315,7 +315,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
315315
smgrtruncate(rel->rd_smgr, MAIN_FORKNUM, nblocks);
316316

317317
/* We've done all the critical work, so checkpoints are OK now. */
318-
MyPgXact->delayChkpt &= ~DELAY_CHKPT_COMPLETE;
318+
MyProc->delayChkptEnd = false;
319319
}
320320

321321
/*

src/backend/storage/buffer/bufmgr.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3514,8 +3514,8 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
35143514
* essential that CreateCheckpoint waits for virtual transactions
35153515
* rather than full transactionids.
35163516
*/
3517-
Assert((MyPgXact->delayChkpt & DELAY_CHKPT_START) == 0);
3518-
MyPgXact->delayChkpt |= DELAY_CHKPT_START;
3517+
Assert(!MyPgXact->delayChkpt);
3518+
MyPgXact->delayChkpt = true;
35193519
delayChkpt = true;
35203520
lsn = XLogSaveBufferForHint(buffer, buffer_std);
35213521
}
@@ -3549,7 +3549,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
35493549
UnlockBufHdr(bufHdr, buf_state);
35503550

35513551
if (delayChkpt)
3552-
MyPgXact->delayChkpt &= ~DELAY_CHKPT_START;
3552+
MyPgXact->delayChkpt = false;
35533553

35543554
if (dirtied)
35553555
{

src/backend/storage/ipc/procarray.c

Lines changed: 75 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,11 @@ static void DisplayXidCache(void);
152152
#define xc_slow_answer_inc() ((void) 0)
153153
#endif /* XIDCACHE_DEBUG */
154154

155+
static VirtualTransactionId *GetVirtualXIDsDelayingChkptGuts(int *nvxids,
156+
int type);
157+
static bool HaveVirtualXIDsDelayingChkptGuts(VirtualTransactionId *vxids,
158+
int nvxids, int type);
159+
155160
/* Primitives for KnownAssignedXids array handling for standby */
156161
static void KnownAssignedXidsCompress(bool force);
157162
static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
@@ -435,8 +440,9 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
435440
/* must be cleared with xid/xmin: */
436441
pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
437442

438-
/* be sure this is cleared in abort */
439-
pgxact->delayChkpt = 0;
443+
/* be sure these are cleared in abort */
444+
pgxact->delayChkpt = false;
445+
proc->delayChkptEnd = false;
440446

441447
proc->recoveryConflictPending = false;
442448

@@ -460,8 +466,9 @@ ProcArrayEndTransactionInternal(PGPROC *proc, PGXACT *pgxact,
460466
/* must be cleared with xid/xmin: */
461467
pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
462468

463-
/* be sure this is cleared in abort */
464-
pgxact->delayChkpt = 0;
469+
/* be sure these are cleared in abort */
470+
pgxact->delayChkpt = false;
471+
proc->delayChkptEnd = false;
465472

466473
proc->recoveryConflictPending = false;
467474

@@ -2263,26 +2270,28 @@ GetOldestSafeDecodingTransactionId(bool catalogOnly)
22632270
}
22642271

22652272
/*
2266-
* GetVirtualXIDsDelayingChkpt -- Get the VXIDs of transactions that are
2267-
* delaying checkpoint because they have critical actions in progress.
2273+
* GetVirtualXIDsDelayingChkptGuts -- Get the VXIDs of transactions that are
2274+
* delaying the start or end of a checkpoint because they have critical
2275+
* actions in progress.
22682276
*
22692277
* Constructs an array of VXIDs of transactions that are currently in commit
2270-
* critical sections, as shown by having specified delayChkpt bits set in their
2271-
* PGXACT.
2278+
* critical sections, as shown by having specified delayChkpt or delayChkptEnd
2279+
* set.
22722280
*
22732281
* Returns a palloc'd array that should be freed by the caller.
22742282
* *nvxids is the number of valid entries.
22752283
*
2276-
* Note that because backends set or clear delayChkpt without holding any lock,
2277-
* the result is somewhat indeterminate, but we don't really care. Even in
2278-
* a multiprocessor with delayed writes to shared memory, it should be certain
2279-
* that setting of delayChkpt will propagate to shared memory when the backend
2280-
* takes a lock, so we cannot fail to see a virtual xact as delayChkpt if
2281-
* it's already inserted its commit record. Whether it takes a little while
2282-
* for clearing of delayChkpt to propagate is unimportant for correctness.
2284+
* Note that because backends set or clear delayChkpt and delayChkptEnd
2285+
* without holding any lock, the result is somewhat indeterminate, but we
2286+
* don't really care. Even in a multiprocessor with delayed writes to
2287+
* shared memory, it should be certain that setting of delayChkpt will
2288+
* propagate to shared memory when the backend takes a lock, so we cannot
2289+
* fail to see a virtual xact as delayChkpt if it's already inserted its
2290+
* commit record. Whether it takes a little while for clearing of
2291+
* delayChkpt to propagate is unimportant for correctness.
22832292
*/
2284-
VirtualTransactionId *
2285-
GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
2293+
static VirtualTransactionId *
2294+
GetVirtualXIDsDelayingChkptGuts(int *nvxids, int type)
22862295
{
22872296
VirtualTransactionId *vxids;
22882297
ProcArrayStruct *arrayP = procArray;
@@ -2303,7 +2312,8 @@ GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
23032312
PGPROC *proc = &allProcs[pgprocno];
23042313
PGXACT *pgxact = &allPgXact[pgprocno];
23052314

2306-
if ((pgxact->delayChkpt & type) != 0)
2315+
if (((type & DELAY_CHKPT_START) && pgxact->delayChkpt) ||
2316+
((type & DELAY_CHKPT_COMPLETE) && proc->delayChkptEnd))
23072317
{
23082318
VirtualTransactionId vxid;
23092319

@@ -2319,6 +2329,26 @@ GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
23192329
return vxids;
23202330
}
23212331

2332+
/*
2333+
* GetVirtualXIDsDelayingChkpt - Get the VXIDs of transactions that are
2334+
* delaying the start of a checkpoint.
2335+
*/
2336+
VirtualTransactionId *
2337+
GetVirtualXIDsDelayingChkpt(int *nvxids)
2338+
{
2339+
return GetVirtualXIDsDelayingChkptGuts(nvxids, DELAY_CHKPT_START);
2340+
}
2341+
2342+
/*
2343+
* GetVirtualXIDsDelayingChkptEnd - Get the VXIDs of transactions that are
2344+
* delaying the end of a checkpoint.
2345+
*/
2346+
VirtualTransactionId *
2347+
GetVirtualXIDsDelayingChkptEnd(int *nvxids)
2348+
{
2349+
return GetVirtualXIDsDelayingChkptGuts(nvxids, DELAY_CHKPT_COMPLETE);
2350+
}
2351+
23222352
/*
23232353
* HaveVirtualXIDsDelayingChkpt -- Are any of the specified VXIDs delaying?
23242354
*
@@ -2328,8 +2358,9 @@ GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
23282358
* Note: this is O(N^2) in the number of vxacts that are/were delaying, but
23292359
* those numbers should be small enough for it not to be a problem.
23302360
*/
2331-
bool
2332-
HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
2361+
static bool
2362+
HaveVirtualXIDsDelayingChkptGuts(VirtualTransactionId *vxids, int nvxids,
2363+
int type)
23332364
{
23342365
bool result = false;
23352366
ProcArrayStruct *arrayP = procArray;
@@ -2348,7 +2379,8 @@ HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
23482379

23492380
GET_VXID_FROM_PGPROC(vxid, *proc);
23502381

2351-
if ((pgxact->delayChkpt & type) != 0 &&
2382+
if ((((type & DELAY_CHKPT_START) && pgxact->delayChkpt) ||
2383+
((type & DELAY_CHKPT_COMPLETE) && proc->delayChkptEnd)) &&
23522384
VirtualTransactionIdIsValid(vxid))
23532385
{
23542386
int i;
@@ -2371,6 +2403,28 @@ HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
23712403
return result;
23722404
}
23732405

2406+
/*
2407+
* HaveVirtualXIDsDelayingChkpt -- Are any of the specified VXIDs delaying
2408+
* the start of a checkpoint?
2409+
*/
2410+
bool
2411+
HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids)
2412+
{
2413+
return HaveVirtualXIDsDelayingChkptGuts(vxids, nvxids,
2414+
DELAY_CHKPT_START);
2415+
}
2416+
2417+
/*
2418+
* HaveVirtualXIDsDelayingChkptEnd -- Are any of the specified VXIDs delaying
2419+
* the end of a checkpoint?
2420+
*/
2421+
bool
2422+
HaveVirtualXIDsDelayingChkptEnd(VirtualTransactionId *vxids, int nvxids)
2423+
{
2424+
return HaveVirtualXIDsDelayingChkptGuts(vxids, nvxids,
2425+
DELAY_CHKPT_COMPLETE);
2426+
}
2427+
23742428
/*
23752429
* BackendPidGetProc -- get a backend's PGPROC given its PID
23762430
*

src/include/storage/proc.h

Lines changed: 9 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -77,36 +77,8 @@ struct XidCache
7777
#define INVALID_PGPROCNO PG_INT32_MAX
7878

7979
/*
80-
* Flags for PGPROC.delayChkpt
81-
*
82-
* These flags can be used to delay the start or completion of a checkpoint
83-
* for short periods. A flag is in effect if the corresponding bit is set in
84-
* the PGPROC of any backend.
85-
*
86-
* For our purposes here, a checkpoint has three phases: (1) determine the
87-
* location to which the redo pointer will be moved, (2) write all the
88-
* data durably to disk, and (3) WAL-log the checkpoint.
89-
*
90-
* Setting DELAY_CHKPT_START prevents the system from moving from phase 1
91-
* to phase 2. This is useful when we are performing a WAL-logged modification
92-
* of data that will be flushed to disk in phase 2. By setting this flag
93-
* before writing WAL and clearing it after we've both written WAL and
94-
* performed the corresponding modification, we ensure that if the WAL record
95-
* is inserted prior to the new redo point, the corresponding data changes will
96-
* also be flushed to disk before the checkpoint can complete. (In the
97-
* extremely common case where the data being modified is in shared buffers
98-
* and we acquire an exclusive content lock on the relevant buffers before
99-
* writing WAL, this mechanism is not needed, because phase 2 will block
100-
* until we release the content lock and then flush the modified data to
101-
* disk.)
102-
*
103-
* Setting DELAY_CHKPT_COMPLETE prevents the system from moving from phase 2
104-
* to phase 3. This is useful if we are performing a WAL-logged operation that
105-
* might invalidate buffers, such as relation truncation. In this case, we need
106-
* to ensure that any buffers which were invalidated and thus not flushed by
107-
* the checkpoint are actaully destroyed on disk. Replay can cope with a file
108-
* or block that doesn't exist, but not with a block that has the wrong
109-
* contents.
80+
* Flags used only for type of internal functions
81+
* GetVirtualXIDsDelayingChkptGuts and HaveVirtualXIDsDelayingChkptGuts.
11082
*/
11183
#define DELAY_CHKPT_START (1<<0)
11284
#define DELAY_CHKPT_COMPLETE (1<<1)
@@ -185,6 +157,12 @@ struct PGPROC
185157
*/
186158
XLogRecPtr waitLSN; /* waiting for this LSN or higher */
187159
int syncRepState; /* wait state for sync rep */
160+
bool delayChkptEnd; /* true if this proc delays checkpoint end;
161+
* this doesn't have anything to do with
162+
* sync rep but we don't want to change
163+
* the size of PGPROC in released branches
164+
* and thus must fit this new field into
165+
* existing padding space */
188166
SHM_QUEUE syncRepLinks; /* list link if process is in syncrep queue */
189167

190168
/*
@@ -267,7 +245,7 @@ typedef struct PGXACT
267245

268246
uint8 vacuumFlags; /* vacuum-related flags, see above */
269247
bool overflowed;
270-
int delayChkpt; /* for DELAY_CHKPT_* flags */
248+
bool delayChkpt; /* true if this proc delays checkpoint start */
271249

272250
uint8 nxids;
273251
} PGXACT;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy