Skip to content

Commit c54b589

Browse files
committed
Morph pg_replication_slots.min_safe_lsn to safe_wal_size
The previous definition of the column was almost universally disliked, so provide this updated definition which is more useful for monitoring purposes: a large positive value is good, while zero or a negative value means danger. This should be operationally more convenient. Backpatch to 13, where the new column to pg_replication_slots (and the feature it represents) were added. Author: Kyotaro Horiguchi <horikyota.ntt@gmail.com> Author: Álvaro Herrera <alvherre@alvh.no-ip.org> Reported-by: Fujii Masao <masao.fujii@oss.nttdata.com> Discussion: https://postgr.es/m/9ddfbf8c-2f67-904d-44ed-cf8bc5916228@oss.nttdata.com
1 parent da6b6ff commit c54b589

File tree

9 files changed

+60
-34
lines changed

9 files changed

+60
-34
lines changed

doc/src/sgml/catalogs.sgml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11275,10 +11275,13 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
1127511275

1127611276
<row>
1127711277
<entry role="catalog_table_entry"><para role="column_definition">
11278-
<structfield>min_safe_lsn</structfield> <type>pg_lsn</type>
11278+
<structfield>safe_wal_size</structfield> <type>int8</type>
1127911279
</para>
1128011280
<para>
11281-
The minimum LSN currently available for walsenders.
11281+
The number of bytes that can be written to WAL such that this slot
11282+
is not in danger of getting in state "lost". It is NULL for lost
11283+
slots, as well as if <varname>max_slot_wal_keep_size</varname>
11284+
is <literal>-1</literal>.
1128211285
</para></entry>
1128311286
</row>
1128411287
</tbody>

src/backend/access/transam/xlog.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -764,8 +764,7 @@ static ControlFileData *ControlFile = NULL;
764764
* Convert values of GUCs measured in megabytes to equiv. segment count.
765765
* Rounds down.
766766
*/
767-
#define ConvertToXSegs(x, segsize) \
768-
((x) / ((segsize) / (1024 * 1024)))
767+
#define ConvertToXSegs(x, segsize) XLogMBVarToSegs((x), (segsize))
769768

770769
/* The number of bytes in a WAL segment usable for WAL data. */
771770
static int UsableBytesInSegment;
@@ -9513,8 +9512,7 @@ GetWALAvailability(XLogRecPtr targetLSN)
95139512
XLogSegNo targetSeg; /* segid of targetLSN */
95149513
XLogSegNo oldestSeg; /* actual oldest segid */
95159514
XLogSegNo oldestSegMaxWalSize; /* oldest segid kept by max_wal_size */
9516-
XLogSegNo oldestSlotSeg = InvalidXLogRecPtr; /* oldest segid kept by
9517-
* slot */
9515+
XLogSegNo oldestSlotSeg; /* oldest segid kept by slot */
95189516
uint64 keepSegs;
95199517

95209518
/*

src/backend/catalog/system_views.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -879,7 +879,7 @@ CREATE VIEW pg_replication_slots AS
879879
L.restart_lsn,
880880
L.confirmed_flush_lsn,
881881
L.wal_status,
882-
L.min_safe_lsn
882+
L.safe_wal_size
883883
FROM pg_get_replication_slots() AS L
884884
LEFT JOIN pg_database D ON (L.datoid = D.oid);
885885

src/backend/replication/slotfuncs.c

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
242242
Tuplestorestate *tupstore;
243243
MemoryContext per_query_ctx;
244244
MemoryContext oldcontext;
245+
XLogRecPtr currlsn;
245246
int slotno;
246247

247248
/* check to see if caller supports us returning a tuplestore */
@@ -274,6 +275,8 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
274275

275276
MemoryContextSwitchTo(oldcontext);
276277

278+
currlsn = GetXLogWriteRecPtr();
279+
277280
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
278281
for (slotno = 0; slotno < max_replication_slots; slotno++)
279282
{
@@ -282,7 +285,6 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
282285
Datum values[PG_GET_REPLICATION_SLOTS_COLS];
283286
bool nulls[PG_GET_REPLICATION_SLOTS_COLS];
284287
WALAvailability walstate;
285-
XLogSegNo last_removed_seg;
286288
int i;
287289

288290
if (!slot->in_use)
@@ -380,36 +382,54 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
380382
* we looked. If checkpointer signalled the process to
381383
* termination, then it's definitely lost; but if a process is
382384
* still alive, then "unreserved" seems more appropriate.
385+
*
386+
* If we do change it, save the state for safe_wal_size below.
383387
*/
384388
if (!XLogRecPtrIsInvalid(slot_contents.data.restart_lsn))
385389
{
386390
int pid;
387391

388392
SpinLockAcquire(&slot->mutex);
389393
pid = slot->active_pid;
394+
slot_contents.data.restart_lsn = slot->data.restart_lsn;
390395
SpinLockRelease(&slot->mutex);
391396
if (pid != 0)
392397
{
393398
values[i++] = CStringGetTextDatum("unreserved");
399+
walstate = WALAVAIL_UNRESERVED;
394400
break;
395401
}
396402
}
397403
values[i++] = CStringGetTextDatum("lost");
398404
break;
399405
}
400406

401-
if (max_slot_wal_keep_size_mb >= 0 &&
402-
(walstate == WALAVAIL_RESERVED || walstate == WALAVAIL_EXTENDED) &&
403-
((last_removed_seg = XLogGetLastRemovedSegno()) != 0))
407+
/*
408+
* safe_wal_size is only computed for slots that have not been lost,
409+
* and only if there's a configured maximum size.
410+
*/
411+
if (walstate == WALAVAIL_REMOVED || max_slot_wal_keep_size_mb < 0)
412+
nulls[i++] = true;
413+
else
404414
{
405-
XLogRecPtr min_safe_lsn;
415+
XLogSegNo targetSeg;
416+
XLogSegNo keepSegs;
417+
XLogSegNo failSeg;
418+
XLogRecPtr failLSN;
406419

407-
XLogSegNoOffsetToRecPtr(last_removed_seg + 1, 0,
408-
wal_segment_size, min_safe_lsn);
409-
values[i++] = Int64GetDatum(min_safe_lsn);
420+
XLByteToSeg(slot_contents.data.restart_lsn, targetSeg, wal_segment_size);
421+
422+
/* determine how many segments slots can be kept by slots ... */
423+
keepSegs = XLogMBVarToSegs(max_slot_wal_keep_size_mb, wal_segment_size);
424+
/* ... and override by wal_keep_segments as needed */
425+
keepSegs = Max(keepSegs, wal_keep_segments);
426+
427+
/* if currpos reaches failLSN, we lose our segment */
428+
failSeg = targetSeg + keepSegs + 1;
429+
XLogSegNoOffsetToRecPtr(failSeg, 0, wal_segment_size, failLSN);
430+
431+
values[i++] = Int64GetDatum(failLSN - currlsn);
410432
}
411-
else
412-
nulls[i++] = true;
413433

414434
Assert(i == PG_GET_REPLICATION_SLOTS_COLS);
415435

src/include/access/xlog_internal.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,13 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader;
121121
#define XLByteToPrevSeg(xlrp, logSegNo, wal_segsz_bytes) \
122122
logSegNo = ((xlrp) - 1) / (wal_segsz_bytes)
123123

124+
/*
125+
* Convert values of GUCs measured in megabytes to equiv. segment count.
126+
* Rounds down.
127+
*/
128+
#define XLogMBVarToSegs(mbvar, wal_segsz_bytes) \
129+
((mbvar) / ((wal_segsz_bytes) / (1024 * 1024)))
130+
124131
/*
125132
* Is an XLogRecPtr within a particular XLOG segment?
126133
*

src/include/catalog/catversion.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,6 @@
5353
*/
5454

5555
/* yyyymmddN */
56-
#define CATALOG_VERSION_NO 202005171
56+
#define CATALOG_VERSION_NO 202007071
5757

5858
#endif

src/include/catalog/pg_proc.dat

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10063,9 +10063,9 @@
1006310063
proname => 'pg_get_replication_slots', prorows => '10', proisstrict => 'f',
1006410064
proretset => 't', provolatile => 's', prorettype => 'record',
1006510065
proargtypes => '',
10066-
proallargtypes => '{name,name,text,oid,bool,bool,int4,xid,xid,pg_lsn,pg_lsn,text,pg_lsn}',
10066+
proallargtypes => '{name,name,text,oid,bool,bool,int4,xid,xid,pg_lsn,pg_lsn,text,int8}',
1006710067
proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o}',
10068-
proargnames => '{slot_name,plugin,slot_type,datoid,temporary,active,active_pid,xmin,catalog_xmin,restart_lsn,confirmed_flush_lsn,wal_status,min_safe_lsn}',
10068+
proargnames => '{slot_name,plugin,slot_type,datoid,temporary,active,active_pid,xmin,catalog_xmin,restart_lsn,confirmed_flush_lsn,wal_status,safe_wal_size}',
1006910069
prosrc => 'pg_get_replication_slots' },
1007010070
{ oid => '3786', descr => 'set up a logical replication slot',
1007110071
proname => 'pg_create_logical_replication_slot', provolatile => 'v',

src/test/recovery/t/019_replslot_limit.pl

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929
# The slot state and remain should be null before the first connection
3030
my $result = $node_master->safe_psql('postgres',
31-
"SELECT restart_lsn IS NULL, wal_status is NULL, min_safe_lsn is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
31+
"SELECT restart_lsn IS NULL, wal_status is NULL, safe_wal_size is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
3232
);
3333
is($result, "t|t|t", 'check the state of non-reserved slot is "unknown"');
3434

@@ -52,9 +52,9 @@
5252
# Stop standby
5353
$node_standby->stop;
5454

55-
# Preparation done, the slot is the state "normal" now
55+
# Preparation done, the slot is the state "reserved" now
5656
$result = $node_master->safe_psql('postgres',
57-
"SELECT wal_status, min_safe_lsn is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
57+
"SELECT wal_status, safe_wal_size IS NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
5858
);
5959
is($result, "reserved|t", 'check the catching-up state');
6060

@@ -64,7 +64,7 @@
6464

6565
# The slot is always "safe" when fitting max_wal_size
6666
$result = $node_master->safe_psql('postgres',
67-
"SELECT wal_status, min_safe_lsn is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
67+
"SELECT wal_status, safe_wal_size IS NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
6868
);
6969
is($result, "reserved|t",
7070
'check that it is safe if WAL fits in max_wal_size');
@@ -74,7 +74,7 @@
7474

7575
# The slot is always "safe" when max_slot_wal_keep_size is not set
7676
$result = $node_master->safe_psql('postgres',
77-
"SELECT wal_status, min_safe_lsn is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
77+
"SELECT wal_status, safe_wal_size IS NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
7878
);
7979
is($result, "reserved|t", 'check that slot is working');
8080

@@ -94,9 +94,7 @@
9494
));
9595
$node_master->reload;
9696

97-
# The slot is in safe state. The distance from the min_safe_lsn should
98-
# be as almost (max_slot_wal_keep_size - 1) times large as the segment
99-
# size
97+
# The slot is in safe state.
10098

10199
$result = $node_master->safe_psql('postgres',
102100
"SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'");
@@ -110,7 +108,7 @@
110108
$result = $node_master->safe_psql('postgres',
111109
"SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'");
112110
is($result, "reserved",
113-
'check that min_safe_lsn gets close to the current LSN');
111+
'check that safe_wal_size gets close to the current LSN');
114112

115113
# The standby can reconnect to master
116114
$node_standby->start;
@@ -152,9 +150,9 @@
152150
# Advance WAL again without checkpoint; remain goes to 0.
153151
advance_wal($node_master, 1);
154152

155-
# Slot gets into 'unreserved' state
153+
# Slot gets into 'unreserved' state and safe_wal_size is negative
156154
$result = $node_master->safe_psql('postgres',
157-
"SELECT wal_status, min_safe_lsn is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
155+
"SELECT wal_status, safe_wal_size <= 0 FROM pg_replication_slots WHERE slot_name = 'rep1'"
158156
);
159157
is($result, "unreserved|t",
160158
'check that the slot state changes to "unreserved"');
@@ -186,7 +184,7 @@
186184

187185
# This slot should be broken
188186
$result = $node_master->safe_psql('postgres',
189-
"SELECT slot_name, active, restart_lsn IS NULL, wal_status, min_safe_lsn FROM pg_replication_slots WHERE slot_name = 'rep1'"
187+
"SELECT slot_name, active, restart_lsn IS NULL, wal_status, safe_wal_size FROM pg_replication_slots WHERE slot_name = 'rep1'"
190188
);
191189
is($result, "rep1|f|t|lost|",
192190
'check that the slot became inactive and the state "lost" persists');

src/test/regress/expected/rules.out

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1464,8 +1464,8 @@ pg_replication_slots| SELECT l.slot_name,
14641464
l.restart_lsn,
14651465
l.confirmed_flush_lsn,
14661466
l.wal_status,
1467-
l.min_safe_lsn
1468-
FROM (pg_get_replication_slots() l(slot_name, plugin, slot_type, datoid, temporary, active, active_pid, xmin, catalog_xmin, restart_lsn, confirmed_flush_lsn, wal_status, min_safe_lsn)
1467+
l.safe_wal_size
1468+
FROM (pg_get_replication_slots() l(slot_name, plugin, slot_type, datoid, temporary, active, active_pid, xmin, catalog_xmin, restart_lsn, confirmed_flush_lsn, wal_status, safe_wal_size)
14691469
LEFT JOIN pg_database d ON ((l.datoid = d.oid)));
14701470
pg_roles| SELECT pg_authid.rolname,
14711471
pg_authid.rolsuper,

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy