Content-Length: 902870 | pFad | http://github.com/postgres/postgres/commit/32ab0fd55da5b667a25e56bd74e258bb8eca6cf3

4C Add TAP tests to check replication slot advance during the checkpoint · postgres/postgres@32ab0fd · GitHub
Skip to content

Commit 32ab0fd

Browse files
committed
Add TAP tests to check replication slot advance during the checkpoint
The new tests verify that logical and physical replication slots are still valid after an immediate restart on checkpoint completion when the slot was advanced during the checkpoint. This commit introduces two new injection points to make these tests possible: * checkpoint-before-old-wal-removal - triggered in the checkpointer process just before old WAL segments cleanup; * logical-replication-slot-advance-segment - triggered in LogicalConfirmReceivedLocation() when restart_lsn was changed enough to point to the next WAL segment. Discussion: https://postgr.es/m/flat/1d12d2-67235980-35-19a406a0%4063439497 Author: Vitaly Davydov <v.davydov@postgrespro.ru> Author: Tomas Vondra <tomas@vondra.me> Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com> Reviewed-by: Amit Kapila <amit.kapila16@gmail.com> Backpatch-through: 17
1 parent 2090edc commit 32ab0fd

File tree

5 files changed

+296
-0
lines changed

5 files changed

+296
-0
lines changed

src/backend/access/transam/xlog.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7313,6 +7313,10 @@ CreateCheckPoint(int flags)
73137313
if (PriorRedoPtr != InvalidXLogRecPtr)
73147314
UpdateCheckPointDistanceEstimate(RedoRecPtr - PriorRedoPtr);
73157315

7316+
#ifdef USE_INJECTION_POINTS
7317+
INJECTION_POINT("checkpoint-before-old-wal-removal");
7318+
#endif
7319+
73167320
/*
73177321
* Delete old log files, those no longer needed for last checkpoint to
73187322
* prevent the disk holding the xlog from growing full.

src/backend/replication/logical/logical.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "postgres.h"
3030

3131
#include "access/xact.h"
32+
#include "access/xlog_internal.h"
3233
#include "access/xlogutils.h"
3334
#include "fmgr.h"
3435
#include "miscadmin.h"
@@ -41,6 +42,7 @@
4142
#include "storage/proc.h"
4243
#include "storage/procarray.h"
4344
#include "utils/builtins.h"
45+
#include "utils/injection_point.h"
4446
#include "utils/inval.h"
4547
#include "utils/memutils.h"
4648

@@ -1844,9 +1846,13 @@ LogicalConfirmReceivedLocation(XLogRecPtr lsn)
18441846
{
18451847
bool updated_xmin = false;
18461848
bool updated_restart = false;
1849+
XLogRecPtr restart_lsn pg_attribute_unused();
18471850

18481851
SpinLockAcquire(&MyReplicationSlot->mutex);
18491852

1853+
/* remember the old restart lsn */
1854+
restart_lsn = MyReplicationSlot->data.restart_lsn;
1855+
18501856
/*
18511857
* Prevent moving the confirmed_flush backwards, as this could lead to
18521858
* data duplication issues caused by replicating already replicated
@@ -1908,6 +1914,18 @@ LogicalConfirmReceivedLocation(XLogRecPtr lsn)
19081914
*/
19091915
if (updated_xmin || updated_restart)
19101916
{
1917+
#ifdef USE_INJECTION_POINTS
1918+
XLogSegNo seg1,
1919+
seg2;
1920+
1921+
XLByteToSeg(restart_lsn, seg1, wal_segment_size);
1922+
XLByteToSeg(MyReplicationSlot->data.restart_lsn, seg2, wal_segment_size);
1923+
1924+
/* trigger injection point, but only if segment changes */
1925+
if (seg1 != seg2)
1926+
INJECTION_POINT("logical-replication-slot-advance-segment");
1927+
#endif
1928+
19111929
ReplicationSlotMarkDirty();
19121930
ReplicationSlotSave();
19131931
elog(DEBUG1, "updated xmin: %u restart: %u", updated_xmin, updated_restart);

src/test/recovery/meson.build

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ tests += {
5353
't/042_low_level_backup.pl',
5454
't/043_no_contrecord_switch.pl',
5555
't/045_archive_restartpoint.pl',
56+
't/046_checkpoint_logical_slot.pl',
57+
't/047_checkpoint_physical_slot.pl'
5658
],
5759
},
5860
}
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
# Copyright (c) 2025, PostgreSQL Global Development Group
2+
#
3+
# This test verifies the case when the logical slot is advanced during
4+
# checkpoint. The test checks that the logical slot's restart_lsn still refers
5+
# to an existed WAL segment after immediate restart.
6+
#
7+
use strict;
8+
use warnings FATAL => 'all';
9+
10+
use PostgreSQL::Test::Cluster;
11+
use PostgreSQL::Test::Utils;
12+
13+
use Test::More;
14+
15+
if ($ENV{enable_injection_points} ne 'yes')
16+
{
17+
plan skip_all => 'Injection points not supported by this build';
18+
}
19+
20+
my ($node, $result);
21+
22+
$node = PostgreSQL::Test::Cluster->new('mike');
23+
$node->init;
24+
$node->append_conf('postgresql.conf',
25+
"shared_preload_libraries = 'injection_points'");
26+
$node->append_conf('postgresql.conf', "wal_level = 'logical'");
27+
$node->start;
28+
$node->safe_psql('postgres', q(CREATE EXTENSION injection_points));
29+
30+
# Create a simple table to generate data into.
31+
$node->safe_psql('postgres',
32+
q{create table t (id serial primary key, b text)});
33+
34+
# Create the two slots we'll need.
35+
$node->safe_psql('postgres',
36+
q{select pg_create_logical_replication_slot('slot_logical', 'test_decoding')}
37+
);
38+
$node->safe_psql('postgres',
39+
q{select pg_create_physical_replication_slot('slot_physical', true)});
40+
41+
# Advance both slots to the current position just to have everything "valid".
42+
$node->safe_psql('postgres',
43+
q{select count(*) from pg_logical_slot_get_changes('slot_logical', null, null)}
44+
);
45+
$node->safe_psql('postgres',
46+
q{select pg_replication_slot_advance('slot_physical', pg_current_wal_lsn())}
47+
);
48+
49+
# Run checkpoint to flush current state to disk and set a baseline.
50+
$node->safe_psql('postgres', q{checkpoint});
51+
52+
# Generate some transactions to get RUNNING_XACTS.
53+
my $xacts = $node->background_psql('postgres');
54+
$xacts->query_until(
55+
qr/run_xacts/,
56+
q(\echo run_xacts
57+
SELECT 1 \watch 0.1
58+
\q
59+
));
60+
61+
# Insert 2M rows; that's about 260MB (~20 segments) worth of WAL.
62+
$node->safe_psql('postgres',
63+
q{insert into t (b) select md5(i::text) from generate_series(1,1000000) s(i)}
64+
);
65+
66+
# Run another checkpoint to set a new restore LSN.
67+
$node->safe_psql('postgres', q{checkpoint});
68+
69+
# Another 2M rows; that's about 260MB (~20 segments) worth of WAL.
70+
$node->safe_psql('postgres',
71+
q{insert into t (b) select md5(i::text) from generate_series(1,1000000) s(i)}
72+
);
73+
74+
# Run another checkpoint, this time in the background, and make it wait
75+
# on the injection point) so that the checkpoint stops right before
76+
# removing old WAL segments.
77+
note('starting checkpoint\n');
78+
79+
my $checkpoint = $node->background_psql('postgres');
80+
$checkpoint->query_safe(
81+
q(select injection_points_attach('checkpoint-before-old-wal-removal','wait'))
82+
);
83+
$checkpoint->query_until(
84+
qr/starting_checkpoint/,
85+
q(\echo starting_checkpoint
86+
checkpoint;
87+
\q
88+
));
89+
90+
# Wait until the checkpoint stops right before removing WAL segments.
91+
note('waiting for injection_point\n');
92+
$node->wait_for_event('checkpointer', 'checkpoint-before-old-wal-removal');
93+
note('injection_point is reached');
94+
95+
# Try to advance the logical slot, but make it stop when it moves to the next
96+
# WAL segment (this has to happen in the background, too).
97+
my $logical = $node->background_psql('postgres');
98+
$logical->query_safe(
99+
q{select injection_points_attach('logical-replication-slot-advance-segment','wait');}
100+
);
101+
$logical->query_until(
102+
qr/get_changes/,
103+
q(
104+
\echo get_changes
105+
select count(*) from pg_logical_slot_get_changes('slot_logical', null, null) \watch 1
106+
\q
107+
));
108+
109+
# Wait until the slot's restart_lsn points to the next WAL segment.
110+
note('waiting for injection_point\n');
111+
$node->wait_for_event('client backend',
112+
'logical-replication-slot-advance-segment');
113+
note('injection_point is reached');
114+
115+
# OK, we're in the right situation: time to advance the physical slot, which
116+
# recalculates the required LSN, and then unblock the checkpoint, which
117+
# removes the WAL still needed by the logical slot.
118+
$node->safe_psql('postgres',
119+
q{select pg_replication_slot_advance('slot_physical', pg_current_wal_lsn())}
120+
);
121+
122+
# Continue the checkpoint.
123+
$node->safe_psql('postgres',
124+
q{select injection_points_wakeup('checkpoint-before-old-wal-removal')});
125+
126+
# Abruptly stop the server (1 second should be enough for the checkpoint
127+
# to finish; it would be better).
128+
$node->stop('immediate');
129+
130+
$node->start;
131+
132+
eval {
133+
$node->safe_psql('postgres',
134+
q{select count(*) from pg_logical_slot_get_changes('slot_logical', null, null);}
135+
);
136+
};
137+
is($@, '', "Logical slot still valid");
138+
139+
done_testing();
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
# Copyright (c) 2025, PostgreSQL Global Development Group
2+
#
3+
# This test verifies the case when the physical slot is advanced during
4+
# checkpoint. The test checks that the physical slot's restart_lsn still refers
5+
# to an existed WAL segment after immediate restart.
6+
#
7+
use strict;
8+
use warnings FATAL => 'all';
9+
10+
use PostgreSQL::Test::Cluster;
11+
use PostgreSQL::Test::Utils;
12+
13+
use Test::More;
14+
15+
if ($ENV{enable_injection_points} ne 'yes')
16+
{
17+
plan skip_all => 'Injection points not supported by this build';
18+
}
19+
20+
my ($node, $result);
21+
22+
$node = PostgreSQL::Test::Cluster->new('mike');
23+
$node->init;
24+
$node->append_conf('postgresql.conf',
25+
"shared_preload_libraries = 'injection_points'");
26+
$node->append_conf('postgresql.conf', "wal_level = 'replica'");
27+
$node->start;
28+
$node->safe_psql('postgres', q(CREATE EXTENSION injection_points));
29+
30+
# Create a simple table to generate data into.
31+
$node->safe_psql('postgres',
32+
q{create table t (id serial primary key, b text)});
33+
34+
# Create a physical replication slot.
35+
$node->safe_psql('postgres',
36+
q{select pg_create_physical_replication_slot('slot_physical', true)});
37+
38+
# Advance slot to the current position, just to have everything "valid".
39+
$node->safe_psql('postgres',
40+
q{select pg_replication_slot_advance('slot_physical', pg_current_wal_lsn())}
41+
);
42+
43+
# Run checkpoint to flush current state to disk and set a baseline.
44+
$node->safe_psql('postgres', q{checkpoint});
45+
46+
# Insert 2M rows; that's about 260MB (~20 segments) worth of WAL.
47+
$node->safe_psql('postgres',
48+
q{insert into t (b) select md5(i::text) from generate_series(1,100000) s(i)}
49+
);
50+
51+
# Advance slot to the current position, just to have everything "valid".
52+
$node->safe_psql('postgres',
53+
q{select pg_replication_slot_advance('slot_physical', pg_current_wal_lsn())}
54+
);
55+
56+
# Run another checkpoint to set a new restore LSN.
57+
$node->safe_psql('postgres', q{checkpoint});
58+
59+
# Another 2M rows; that's about 260MB (~20 segments) worth of WAL.
60+
$node->safe_psql('postgres',
61+
q{insert into t (b) select md5(i::text) from generate_series(1,1000000) s(i)}
62+
);
63+
64+
my $restart_lsn_init = $node->safe_psql('postgres',
65+
q{select restart_lsn from pg_replication_slots where slot_name = 'slot_physical'}
66+
);
67+
chomp($restart_lsn_init);
68+
note("restart lsn before checkpoint: $restart_lsn_init");
69+
70+
# Run another checkpoint, this time in the background, and make it wait
71+
# on the injection point) so that the checkpoint stops right before
72+
# removing old WAL segments.
73+
note('starting checkpoint');
74+
75+
my $checkpoint = $node->background_psql('postgres');
76+
$checkpoint->query_safe(
77+
q{select injection_points_attach('checkpoint-before-old-wal-removal','wait')}
78+
);
79+
$checkpoint->query_until(
80+
qr/starting_checkpoint/,
81+
q(\echo starting_checkpoint
82+
checkpoint;
83+
\q
84+
));
85+
86+
# Wait until the checkpoint stops right before removing WAL segments.
87+
note('waiting for injection_point');
88+
$node->wait_for_event('checkpointer', 'checkpoint-before-old-wal-removal');
89+
note('injection_point is reached');
90+
91+
# OK, we're in the right situation: time to advance the physical slot, which
92+
# recalculates the required LSN and then unblock the checkpoint, which
93+
# removes the WAL still needed by the physical slot.
94+
$node->safe_psql('postgres',
95+
q{select pg_replication_slot_advance('slot_physical', pg_current_wal_lsn())}
96+
);
97+
98+
# Continue the checkpoint.
99+
$node->safe_psql('postgres',
100+
q{select injection_points_wakeup('checkpoint-before-old-wal-removal')});
101+
102+
my $restart_lsn_old = $node->safe_psql('postgres',
103+
q{select restart_lsn from pg_replication_slots where slot_name = 'slot_physical'}
104+
);
105+
chomp($restart_lsn_old);
106+
note("restart lsn before stop: $restart_lsn_old");
107+
108+
# Abruptly stop the server (1 second should be enough for the checkpoint
109+
# to finish; it would be better).
110+
$node->stop('immediate');
111+
112+
$node->start;
113+
114+
# Get the restart_lsn of the slot right after restarting.
115+
my $restart_lsn = $node->safe_psql('postgres',
116+
q{select restart_lsn from pg_replication_slots where slot_name = 'slot_physical'}
117+
);
118+
chomp($restart_lsn);
119+
note("restart lsn: $restart_lsn");
120+
121+
# Get the WAL segment name for the slot's restart_lsn.
122+
my $restart_lsn_segment = $node->safe_psql('postgres',
123+
"SELECT pg_walfile_name('$restart_lsn'::pg_lsn)");
124+
chomp($restart_lsn_segment);
125+
126+
# Check if the required wal segment exists.
127+
note("required by slot segment name: $restart_lsn_segment");
128+
my $datadir = $node->data_dir;
129+
ok( -f "$datadir/pg_wal/$restart_lsn_segment",
130+
"WAL segment $restart_lsn_segment for physical slot's restart_lsn $restart_lsn exists"
131+
);
132+
133+
done_testing();

0 commit comments

Comments
 (0)








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: http://github.com/postgres/postgres/commit/32ab0fd55da5b667a25e56bd74e258bb8eca6cf3

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy