Content-Length: 340019 | pFad | https://github.com/postgrespro/postgres/commit/f28bf667f602f6ff36c219eb40c5f61de4440ae5

F7 Add retries for further investigation of 019_replslot_limit.pl failures. · postgrespro/postgres@f28bf66 · GitHub
Skip to content

Commit f28bf66

Browse files
committed
Add retries for further investigation of 019_replslot_limit.pl failures.
Tom noticed evidence in the buildfarm suggesting the failures might just be really slow process exits. To investigate further, instead of giving up after seeing multiple walsender pids once, retry. For now continue to report test failure if a retry succeeds. See also commit afdeff1 and fe0972e. Per suggestion from Tom Lane. Discussion: https://postgr.es/m/3042597.1648148740@sss.pgh.pa.us
1 parent 26ebb0e commit f28bf66

File tree

1 file changed

+29
-11
lines changed

1 file changed

+29
-11
lines changed

src/test/recovery/t/019_replslot_limit.pl

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -333,23 +333,41 @@
333333
$node_standby3->append_conf('postgresql.conf', "primary_slot_name = 'rep3'");
334334
$node_standby3->start;
335335
$node_primary3->wait_for_catchup($node_standby3);
336-
my $senderpid = $node_primary3->safe_psql('postgres',
337-
"SELECT pid FROM pg_stat_activity WHERE backend_type = 'walsender'");
338-
339-
# We've seen occasional cases where multiple walsender pids are active. An
340-
# immediate shutdown may hide evidence of a locking bug. So if multiple
341-
# walsenders are observed, shut down in fast mode, and collect some more
342-
# information.
343-
if (not like($senderpid, qr/^[0-9]+$/, "have walsender pid $senderpid"))
336+
337+
my $senderpid;
338+
339+
# We've seen occasional cases where multiple walsender pids are active. It
340+
# could be that we're just observing process shutdown being slow. To collect
341+
# more information, retry a couple times, print a bit of debugging information
342+
# each iteration. For now report a test failure even if later iterations
343+
# succeed.
344+
my $i = 0;
345+
while (1)
344346
{
345347
my ($stdout, $stderr);
348+
349+
$senderpid = $node_primary3->safe_psql('postgres',
350+
"SELECT pid FROM pg_stat_activity WHERE backend_type = 'walsender'");
351+
352+
last if like($senderpid, qr/^[0-9]+$/, "have walsender pid $senderpid");
353+
354+
# show information about all active connections
346355
$node_primary3->psql('postgres',
347356
"\\a\\t\nSELECT * FROM pg_stat_activity",
348357
stdout => \$stdout, stderr => \$stderr);
349358
diag $stdout, $stderr;
350-
$node_primary3->stop('fast');
351-
$node_standby3->stop('fast');
352-
die "could not determine walsender pid, can't continue";
359+
360+
# unlikely that the problem would resolve after 15s, so give up at point
361+
if ($i++ == 150)
362+
{
363+
# An immediate shutdown may hide evidence of a locking bug. If
364+
# retrying didn't resolve the issue, shut down in fast mode.
365+
$node_primary3->stop('fast');
366+
$node_standby3->stop('fast');
367+
die "could not determine walsender pid, can't continue";
368+
}
369+
370+
usleep(100_000);
353371
}
354372

355373
my $receiverpid = $node_standby3->safe_psql('postgres',

0 commit comments

Comments
 (0)








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: https://github.com/postgrespro/postgres/commit/f28bf667f602f6ff36c219eb40c5f61de4440ae5

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy