Skip to content

Commit bb86141

Browse files
committed
Kill dead-end children when there's nothing else left
Previously, the postmaster would never try to kill dead-end child processes, even if there were no other processes left. A dead-end backend will eventually exit, when authentication_timeout expires, but if a dead-end backend is the only thing that's preventing the server from shutting down, it seems better to kill it immediately. It's particularly important, if there was a bug in the early startup code that prevented a dead-end child from timing out and exiting normally. Includes a test for that case where a dead-end backend previously prevented the server from shutting down. Reviewed-by: Andres Freund <andres@anarazel.de> Discussion: https://www.postgresql.org/message-id/a102f15f-eac4-4ff2-af02-f9ff209ec66f@iki.fi
1 parent 18d67a8 commit bb86141

File tree

4 files changed

+116
-10
lines changed

4 files changed

+116
-10
lines changed

src/backend/postmaster/postmaster.c

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2985,10 +2985,11 @@ PostmasterStateMachine(void)
29852985
if (Shutdown >= ImmediateShutdown || FatalError)
29862986
{
29872987
/*
2988-
* Start waiting for dead_end children to die. This state
2989-
* change causes ServerLoop to stop creating new ones.
2988+
* Stop any dead_end children and stop creating new ones.
29902989
*/
29912990
pmState = PM_WAIT_DEAD_END;
2991+
ConfigurePostmasterWaitSet(false);
2992+
SignalChildren(SIGQUIT, btmask(B_DEAD_END_BACKEND));
29922993

29932994
/*
29942995
* We already SIGQUIT'd the archiver and stats processes, if
@@ -3027,9 +3028,10 @@ PostmasterStateMachine(void)
30273028
*/
30283029
FatalError = true;
30293030
pmState = PM_WAIT_DEAD_END;
3031+
ConfigurePostmasterWaitSet(false);
30303032

30313033
/* Kill the walsenders and archiver too */
3032-
SignalChildren(SIGQUIT, btmask_all_except(B_DEAD_END_BACKEND));
3034+
SignalChildren(SIGQUIT, BTYPE_MASK_ALL);
30333035
if (PgArchPID != 0)
30343036
signal_child(PgArchPID, SIGQUIT);
30353037
}
@@ -3048,14 +3050,13 @@ PostmasterStateMachine(void)
30483050
if (PgArchPID == 0 && CountChildren(btmask_all_except(B_DEAD_END_BACKEND)) == 0)
30493051
{
30503052
pmState = PM_WAIT_DEAD_END;
3053+
ConfigurePostmasterWaitSet(false);
3054+
SignalChildren(SIGTERM, BTYPE_MASK_ALL);
30513055
}
30523056
}
30533057

30543058
if (pmState == PM_WAIT_DEAD_END)
30553059
{
3056-
/* Don't allow any new socket connection events. */
3057-
ConfigurePostmasterWaitSet(false);
3058-
30593060
/*
30603061
* PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
30613062
* (ie, no dead_end children remain), and the archiver is gone too.
@@ -3381,12 +3382,12 @@ SignalChildren(int signal, BackendTypeMask targetMask)
33813382

33823383
/*
33833384
* Send a termination signal to children. This considers all of our children
3384-
* processes, except syslogger and dead_end backends.
3385+
* processes, except syslogger.
33853386
*/
33863387
static void
33873388
TerminateChildren(int signal)
33883389
{
3389-
SignalChildren(signal, btmask_all_except(B_DEAD_END_BACKEND));
3390+
SignalChildren(signal, BTYPE_MASK_ALL);
33903391
if (StartupPID != 0)
33913392
{
33923393
signal_child(StartupPID, signal);

src/test/perl/PostgreSQL/Test/Cluster.pm

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1194,6 +1194,9 @@ this to fail. Otherwise, tests might fail to detect server crashes.
11941194
With optional extra param fail_ok => 1, returns 0 for failure
11951195
instead of bailing out.
11961196
1197+
The optional extra param timeout can be used to pass the pg_ctl
1198+
--timeout option.
1199+
11971200
=cut
11981201

11991202
sub stop
@@ -1209,8 +1212,11 @@ sub stop
12091212
return 1 unless defined $self->{_pid};
12101213

12111214
print "### Stopping node \"$name\" using mode $mode\n";
1212-
$ret = PostgreSQL::Test::Utils::system_log('pg_ctl', '-D', $pgdata,
1213-
'-m', $mode, 'stop');
1215+
my @cmd = ('pg_ctl', '-D', $pgdata, '-m', $mode, 'stop');
1216+
if ($params{timeout}) {
1217+
push(@cmd, ('--timeout', $params{timeout}));
1218+
}
1219+
$ret = PostgreSQL::Test::Utils::system_log(@cmd);
12141220

12151221
if ($ret != 0)
12161222
{

src/test/postmaster/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ tests += {
77
'tap': {
88
'tests': [
99
't/001_connection_limits.pl',
10+
't/002_start_stop.pl',
1011
],
1112
},
1213
}
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
2+
# Copyright (c) 2021-2024, PostgreSQL Global Development Group
3+
4+
# Test postmaster start and stop state machine.
5+
6+
use strict;
7+
use warnings FATAL => 'all';
8+
use PostgreSQL::Test::Cluster;
9+
use PostgreSQL::Test::Utils;
10+
use Test::More;
11+
12+
#
13+
# Test that dead-end backends don't prevent the server from shutting
14+
# down.
15+
#
16+
# Dead-end backends can linger until they reach
17+
# authentication_timeout. We use a long authentication_timeout and a
18+
# much shorter timeout for the "pg_ctl stop" operation, to test that
19+
# if dead-end backends are killed at fast shut down. If they're not,
20+
# "pg_ctl stop" will error out before the authentication timeout kicks
21+
# in and cleans up the dead-end backends.
22+
my $authentication_timeout = $PostgreSQL::Test::Utils::timeout_default;
23+
my $stop_timeout = $authentication_timeout / 2;
24+
25+
# Initialize the server with low connection limits, to test dead-end backends
26+
my $node = PostgreSQL::Test::Cluster->new('main');
27+
$node->init;
28+
$node->append_conf('postgresql.conf', "max_connections = 5");
29+
$node->append_conf('postgresql.conf', "max_wal_senders = 0");
30+
$node->append_conf('postgresql.conf', "autovacuum_max_workers = 1");
31+
$node->append_conf('postgresql.conf', "max_worker_processes = 1");
32+
$node->append_conf('postgresql.conf', "log_connections = on");
33+
$node->append_conf('postgresql.conf', "log_min_messages = debug2");
34+
$node->append_conf('postgresql.conf',
35+
"authentication_timeout = '$authentication_timeout s'");
36+
$node->append_conf('postgresql.conf', 'trace_connection_negotiation=on');
37+
$node->start;
38+
39+
if (!$node->raw_connect_works())
40+
{
41+
plan skip_all => "this test requires working raw_connect()";
42+
}
43+
44+
my @raw_connections = ();
45+
46+
# Open a lot of TCP (or Unix domain socket) connections to use up all
47+
# the connection slots. Beyond a certain number (roughly 2x
48+
# max_connections), they will be "dead-end backends".
49+
for (my $i = 0; $i <= 20; $i++)
50+
{
51+
my $sock = $node->raw_connect();
52+
53+
# On a busy system, the server might reject connections if
54+
# postmaster cannot accept() them fast enough. The exact limit
55+
# and behavior depends on the platform. To make this reliable,
56+
# we attempt SSL negotiation on each connection before opening
57+
# next one. The server will reject the SSL negotations, but
58+
# when it does so, we know that the backend has been launched
59+
# and we should be able to open another connection.
60+
61+
# SSLRequest packet consists of packet length followed by
62+
# NEGOTIATE_SSL_CODE.
63+
my $negotiate_ssl_code = pack("Nnn", 8, 1234, 5679);
64+
my $sent = $sock->send($negotiate_ssl_code);
65+
66+
# Read reply. We expect the server to reject it with 'N'
67+
my $reply = "";
68+
$sock->recv($reply, 1);
69+
is($reply, "N", "dead-end connection $i");
70+
71+
push(@raw_connections, $sock);
72+
}
73+
74+
# When all the connection slots are in use, new connections will fail
75+
# before even looking up the user. Hence you now get "sorry, too many
76+
# clients already" instead of "role does not exist" error. Test that
77+
# to ensure that we have used up all the slots.
78+
$node->connect_fails("dbname=postgres user=invalid_user",
79+
"connect ",
80+
expected_stderr => qr/FATAL: sorry, too many clients already/);
81+
82+
# Open one more connection, to really ensure that we have at least one
83+
# dead-end backend.
84+
my $sock = $node->raw_connect();
85+
86+
# Test that the dead-end backends don't prevent the server from stopping.
87+
$node->stop('fast', timeout => $stop_timeout);
88+
89+
$node->start();
90+
$node->connect_ok("dbname=postgres", "works after restart");
91+
92+
# Clean up
93+
foreach my $socket (@raw_connections)
94+
{
95+
$socket->close();
96+
}
97+
98+
done_testing();

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy