Content-Length: 577662 | pFad | http://github.com/postgres/postgres/commit/e2832bd961103a17e281919de7151f80f518cf24

E2 Keep WAL segments by the flushed value of the slot's restart LSN · postgres/postgres@e2832bd · GitHub
Skip to content

Commit e2832bd

Browse files
committed
Keep WAL segments by the flushed value of the slot's restart LSN
The patch fixes the issue with the unexpected removal of old WAL segments after checkpoint, followed by an immediate restart. The issue occurs when a slot is advanced after the start of the checkpoint and before old WAL segments are removed at the end of the checkpoint. The idea of the patch is to get the minimal restart_lsn at the beginning of checkpoint (or restart point) creation and use this value when calculating the oldest LSN for WAL segments removal at the end of checkpoint. This idea was proposed by Tomas Vondra in the discussion. Unlike 291221c46575, this fix doesn't affect ABI and is intended for back branches. Discussion: https://postgr.es/m/flat/1d12d2-67235980-35-19a406a0%4063439497 Author: Vitaly Davydov <v.davydov@postgrespro.ru> Reviewed-by: Tomas Vondra <tomas@vondra.me> Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com> Reviewed-by: Amit Kapila <amit.kapila16@gmail.com> Backpatch-through: 13
1 parent 7c7c0a7 commit e2832bd

File tree

3 files changed

+60
-9
lines changed

3 files changed

+60
-9
lines changed

src/backend/access/transam/xlog.c

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -930,7 +930,8 @@ static void LocalSetXLogInsertAllowed(void);
930930
static void CreateEndOfRecoveryRecord(void);
931931
static XLogRecPtr CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn);
932932
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags);
933-
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo);
933+
static void KeepLogSeg(XLogRecPtr recptr, XLogRecPtr slotsMinLSN,
934+
XLogSegNo *logSegNo);
934935
static XLogRecPtr XLogGetReplicationSlotMinimumLSN(void);
935936

936937
static void AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic);
@@ -9122,6 +9123,7 @@ CreateCheckPoint(int flags)
91229123
XLogRecPtr last_important_lsn;
91239124
VirtualTransactionId *vxids;
91249125
int nvxids;
9126+
XLogRecPtr slotsMinReqLSN;
91259127

91269128
/*
91279129
* An end-of-recovery checkpoint is really a shutdown checkpoint, just
@@ -9335,6 +9337,15 @@ CreateCheckPoint(int flags)
93359337
*/
93369338
END_CRIT_SECTION();
93379339

9340+
/*
9341+
* Get the current minimum LSN to be used later in the WAL segment
9342+
* cleanup. We may clean up only WAL segments, which are not needed
9343+
* according to synchronized LSNs of replication slots. The slot's LSN
9344+
* might be advanced concurrently, so we call this before
9345+
* CheckPointReplicationSlots() synchronizes replication slots.
9346+
*/
9347+
slotsMinReqLSN = XLogGetReplicationSlotMinimumLSN();
9348+
93389349
/*
93399350
* In some cases there are groups of actions that must all occur on one
93409351
* side or the other of a checkpoint record. Before flushing the
@@ -9499,15 +9510,23 @@ CreateCheckPoint(int flags)
94999510
* prevent the disk holding the xlog from growing full.
95009511
*/
95019512
XLByteToSeg(RedoRecPtr, _logSegNo, wal_segment_size);
9502-
KeepLogSeg(recptr, &_logSegNo);
9513+
KeepLogSeg(recptr, slotsMinReqLSN, &_logSegNo);
95039514
if (InvalidateObsoleteReplicationSlots(_logSegNo))
95049515
{
9516+
/*
9517+
* Recalculate the current minimum LSN to be used in the WAL segment
9518+
* cleanup. Then, we must synchronize the replication slots again in
9519+
* order to make this LSN safe to use.
9520+
*/
9521+
slotsMinReqLSN = XLogGetReplicationSlotMinimumLSN();
9522+
CheckPointReplicationSlots();
9523+
95059524
/*
95069525
* Some slots have been invalidated; recalculate the old-segment
95079526
* horizon, starting again from RedoRecPtr.
95089527
*/
95099528
XLByteToSeg(RedoRecPtr, _logSegNo, wal_segment_size);
9510-
KeepLogSeg(recptr, &_logSegNo);
9529+
KeepLogSeg(recptr, slotsMinReqLSN, &_logSegNo);
95119530
}
95129531
_logSegNo--;
95139532
RemoveOldXlogFiles(_logSegNo, RedoRecPtr, recptr);
@@ -9740,6 +9759,7 @@ CreateRestartPoint(int flags)
97409759
XLogRecPtr endptr;
97419760
XLogSegNo _logSegNo;
97429761
TimestampTz xtime;
9762+
XLogRecPtr slotsMinReqLSN;
97439763

97449764
/* Get a local copy of the last safe checkpoint record. */
97459765
SpinLockAcquire(&XLogCtl->info_lck);
@@ -9820,6 +9840,15 @@ CreateRestartPoint(int flags)
98209840
MemSet(&CheckpointStats, 0, sizeof(CheckpointStats));
98219841
CheckpointStats.ckpt_start_t = GetCurrentTimestamp();
98229842

9843+
/*
9844+
* Get the current minimum LSN to be used later in the WAL segment
9845+
* cleanup. We may clean up only WAL segments, which are not needed
9846+
* according to synchronized LSNs of replication slots. The slot's LSN
9847+
* might be advanced concurrently, so we call this before
9848+
* CheckPointReplicationSlots() synchronizes replication slots.
9849+
*/
9850+
slotsMinReqLSN = XLogGetReplicationSlotMinimumLSN();
9851+
98239852
if (log_checkpoints)
98249853
LogCheckpointStart(flags, true);
98259854

@@ -9908,15 +9937,23 @@ CreateRestartPoint(int flags)
99089937
receivePtr = GetWalRcvFlushRecPtr(NULL, NULL);
99099938
replayPtr = GetXLogReplayRecPtr(&replayTLI);
99109939
endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr;
9911-
KeepLogSeg(endptr, &_logSegNo);
9940+
KeepLogSeg(endptr, slotsMinReqLSN, &_logSegNo);
99129941
if (InvalidateObsoleteReplicationSlots(_logSegNo))
99139942
{
9943+
/*
9944+
* Recalculate the current minimum LSN to be used in the WAL segment
9945+
* cleanup. Then, we must synchronize the replication slots again in
9946+
* order to make this LSN safe to use.
9947+
*/
9948+
slotsMinReqLSN = XLogGetReplicationSlotMinimumLSN();
9949+
CheckPointReplicationSlots();
9950+
99149951
/*
99159952
* Some slots have been invalidated; recalculate the old-segment
99169953
* horizon, starting again from RedoRecPtr.
99179954
*/
99189955
XLByteToSeg(RedoRecPtr, _logSegNo, wal_segment_size);
9919-
KeepLogSeg(endptr, &_logSegNo);
9956+
KeepLogSeg(endptr, slotsMinReqLSN, &_logSegNo);
99209957
}
99219958
_logSegNo--;
99229959

@@ -10019,6 +10056,7 @@ GetWALAvailability(XLogRecPtr targetLSN)
1001910056
XLogSegNo oldestSegMaxWalSize; /* oldest segid kept by max_wal_size */
1002010057
XLogSegNo oldestSlotSeg; /* oldest segid kept by slot */
1002110058
uint64 keepSegs;
10059+
XLogRecPtr slotsMinReqLSN;
1002210060

1002310061
/*
1002410062
* slot does not reserve WAL. Either deactivated, or has never been active
@@ -10032,8 +10070,9 @@ GetWALAvailability(XLogRecPtr targetLSN)
1003210070
* oldestSlotSeg to the current segment.
1003310071
*/
1003410072
currpos = GetXLogWriteRecPtr();
10073+
slotsMinReqLSN = XLogGetReplicationSlotMinimumLSN();
1003510074
XLByteToSeg(currpos, oldestSlotSeg, wal_segment_size);
10036-
KeepLogSeg(currpos, &oldestSlotSeg);
10075+
KeepLogSeg(currpos, slotsMinReqLSN, &oldestSlotSeg);
1003710076

1003810077
/*
1003910078
* Find the oldest extant segment file. We get 1 until checkpoint removes
@@ -10094,7 +10133,7 @@ GetWALAvailability(XLogRecPtr targetLSN)
1009410133
* invalidation is optionally done here, instead.
1009510134
*/
1009610135
static void
10097-
KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
10136+
KeepLogSeg(XLogRecPtr recptr, XLogRecPtr slotsMinReqLSN, XLogSegNo *logSegNo)
1009810137
{
1009910138
XLogSegNo currSegNo;
1010010139
XLogSegNo segno;
@@ -10107,7 +10146,7 @@ KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
1010710146
* Calculate how many segments are kept by slots first, adjusting for
1010810147
* max_slot_wal_keep_size.
1010910148
*/
10110-
keep = XLogGetReplicationSlotMinimumLSN();
10149+
keep = slotsMinReqLSN;
1011110150
if (keep != InvalidXLogRecPtr && keep < recptr)
1011210151
{
1011310152
XLByteToSeg(keep, segno, wal_segment_size);

src/backend/replication/logical/logical.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1768,7 +1768,15 @@ LogicalConfirmReceivedLocation(XLogRecPtr lsn)
17681768

17691769
SpinLockRelease(&MyReplicationSlot->mutex);
17701770

1771-
/* first write new xmin to disk, so we know what's up after a crash */
1771+
/*
1772+
* First, write new xmin and restart_lsn to disk so we know what's up
1773+
* after a crash. Even when we do this, the checkpointer can see the
1774+
* updated restart_lsn value in the shared memory; then, a crash can
1775+
* happen before we manage to write that value to the disk. Thus,
1776+
* checkpointer still needs to make special efforts to keep WAL
1777+
* segments required by the restart_lsn written to the disk. See
1778+
* CreateCheckPoint() and CreateRestartPoint() for details.
1779+
*/
17721780
if (updated_xmin || updated_restart)
17731781
{
17741782
ReplicationSlotMarkDirty();

src/backend/replication/walsender.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1906,6 +1906,10 @@ PhysicalConfirmReceivedLocation(XLogRecPtr lsn)
19061906
* be energy wasted - the worst lost information can do here is give us
19071907
* wrong information in a statistics view - we'll just potentially be more
19081908
* conservative in removing files.
1909+
*
1910+
* Checkpointer makes special efforts to keep the WAL segments required by
1911+
* the restart_lsn written to the disk. See CreateCheckPoint() and
1912+
* CreateRestartPoint() for details.
19091913
*/
19101914
}
19111915

0 commit comments

Comments
 (0)








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: http://github.com/postgres/postgres/commit/e2832bd961103a17e281919de7151f80f518cf24

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy