Skip to content

Commit c30f54a

Browse files
committed
Detect POLLHUP/POLLRDHUP while running queries.
Provide a new GUC check_client_connection_interval that can be used to check whether the client connection has gone away, while running very long queries. It is disabled by default. For now this uses a non-standard Linux extension (also adopted by at least one other OS). POLLRDHUP is not defined by POSIX, and other OSes don't have a reliable way to know if a connection was closed without actually trying to read or write. In future we might consider trying to send a no-op/heartbeat message instead, but that could require protocol changes. Author: Sergey Cherkashin <s.cherkashin@postgrespro.ru> Author: Thomas Munro <thomas.munro@gmail.com> Reviewed-by: Thomas Munro <thomas.munro@gmail.com> Reviewed-by: Tatsuo Ishii <ishii@sraoss.co.jp> Reviewed-by: Konstantin Knizhnik <k.knizhnik@postgrespro.ru> Reviewed-by: Zhihong Yu <zyu@yugabyte.com> Reviewed-by: Andres Freund <andres@anarazel.de> Reviewed-by: Maksim Milyutin <milyutinma@gmail.com> Reviewed-by: Tsunakawa, Takayuki/綱川 貴之 <tsunakawa.takay@fujitsu.com> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> (much earlier version) Discussion: https://postgr.es/m/77def86b27e41f0efcba411460e929ae%40postgrespro.ru
1 parent 174edbe commit c30f54a

File tree

11 files changed

+156
-0
lines changed

11 files changed

+156
-0
lines changed

doc/src/sgml/config.sgml

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -998,6 +998,43 @@ include_dir 'conf.d'
998998
</listitem>
999999
</varlistentry>
10001000

1001+
<varlistentry id="guc-client-connection-check-interval" xreflabel="client_connection_check_interval">
1002+
<term><varname>client_connection_check_interval</varname> (<type>integer</type>)
1003+
<indexterm>
1004+
<primary><varname>client_connection_check_interval</varname> configuration parameter</primary>
1005+
</indexterm>
1006+
</term>
1007+
<listitem>
1008+
<para>
1009+
Sets the time interval between optional checks that the client is still
1010+
connected, while running queries. The check is performed by polling
1011+
the socket, and allows long running queries to be aborted sooner if
1012+
the kernel reports that the connection is closed.
1013+
</para>
1014+
<para>
1015+
This option is currently available only on systems that support the
1016+
non-standard <symbol>POLLRDHUP</symbol> extension to the
1017+
<symbol>poll</symbol> system call, including Linux.
1018+
</para>
1019+
<para>
1020+
If the value is specified without units, it is taken as milliseconds.
1021+
The default value is <literal>0</literal>, which disables connection
1022+
checks. Without connection checks, the server will detect the loss of
1023+
the connection only at the next interaction with the socket, when it
1024+
waits for, receives or sends data.
1025+
</para>
1026+
<para>
1027+
For the kernel itself to detect lost TCP connections reliably and within
1028+
a known timeframe in all scenarios including network failure, it may
1029+
also be necessary to adjust the TCP keepalive settings of the operating
1030+
system, or the <xref linkend="guc-tcp-keepalives-idle"/>,
1031+
<xref linkend="guc-tcp-keepalives-interval"/> and
1032+
<xref linkend="guc-tcp-keepalives-count"/> settings of
1033+
<productname>PostgreSQL</productname>.
1034+
</para>
1035+
</listitem>
1036+
</varlistentry>
1037+
10011038
</variablelist>
10021039
</sect2>
10031040

src/backend/libpq/pqcomm.c

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@
5454
*/
5555
#include "postgres.h"
5656

57+
#ifdef HAVE_POLL_H
58+
#include <poll.h>
59+
#endif
5760
#include <signal.h>
5861
#include <fcntl.h>
5962
#include <grp.h>
@@ -1921,3 +1924,40 @@ pq_settcpusertimeout(int timeout, Port *port)
19211924

19221925
return STATUS_OK;
19231926
}
1927+
1928+
/*
1929+
* Check if the client is still connected.
1930+
*/
1931+
bool
1932+
pq_check_connection(void)
1933+
{
1934+
#if defined(POLLRDHUP)
1935+
/*
1936+
* POLLRDHUP is a Linux extension to poll(2) to detect sockets closed by
1937+
* the other end. We don't have a portable way to do that without
1938+
* actually trying to read or write data on other systems. We don't want
1939+
* to read because that would be confused by pipelined queries and COPY
1940+
* data. Perhaps in future we'll try to write a heartbeat message instead.
1941+
*/
1942+
struct pollfd pollfd;
1943+
int rc;
1944+
1945+
pollfd.fd = MyProcPort->sock;
1946+
pollfd.events = POLLOUT | POLLIN | POLLRDHUP;
1947+
pollfd.revents = 0;
1948+
1949+
rc = poll(&pollfd, 1, 0);
1950+
1951+
if (rc < 0)
1952+
{
1953+
ereport(COMMERROR,
1954+
(errcode_for_socket_access(),
1955+
errmsg("could not poll socket: %m")));
1956+
return false;
1957+
}
1958+
else if (rc == 1 && (pollfd.revents & (POLLHUP | POLLRDHUP)))
1959+
return false;
1960+
#endif
1961+
1962+
return true;
1963+
}

src/backend/tcop/postgres.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,9 @@ int max_stack_depth = 100;
102102
/* wait N seconds to allow attach from a debugger */
103103
int PostAuthDelay = 0;
104104

105+
/* Time between checks that the client is still connected. */
106+
int client_connection_check_interval = 0;
107+
105108
/* ----------------
106109
* private typedefs etc
107110
* ----------------
@@ -2671,6 +2674,14 @@ start_xact_command(void)
26712674
* not desired, the timeout has to be disabled explicitly.
26722675
*/
26732676
enable_statement_timeout();
2677+
2678+
/* Start timeout for checking if the client has gone away if necessary. */
2679+
if (client_connection_check_interval > 0 &&
2680+
IsUnderPostmaster &&
2681+
MyProcPort &&
2682+
!get_timeout_active(CLIENT_CONNECTION_CHECK_TIMEOUT))
2683+
enable_timeout_after(CLIENT_CONNECTION_CHECK_TIMEOUT,
2684+
client_connection_check_interval);
26742685
}
26752686

26762687
static void
@@ -3149,6 +3160,27 @@ ProcessInterrupts(void)
31493160
(errcode(ERRCODE_ADMIN_SHUTDOWN),
31503161
errmsg("terminating connection due to administrator command")));
31513162
}
3163+
3164+
if (CheckClientConnectionPending)
3165+
{
3166+
CheckClientConnectionPending = false;
3167+
3168+
/*
3169+
* Check for lost connection and re-arm, if still configured, but not
3170+
* if we've arrived back at DoingCommandRead state. We don't want to
3171+
* wake up idle sessions, and they already know how to detect lost
3172+
* connections.
3173+
*/
3174+
if (!DoingCommandRead && client_connection_check_interval > 0)
3175+
{
3176+
if (!pq_check_connection())
3177+
ClientConnectionLost = true;
3178+
else
3179+
enable_timeout_after(CLIENT_CONNECTION_CHECK_TIMEOUT,
3180+
client_connection_check_interval);
3181+
}
3182+
}
3183+
31523184
if (ClientConnectionLost)
31533185
{
31543186
QueryCancelPending = false; /* lost connection trumps QueryCancel */

src/backend/utils/init/globals.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ ProtocolVersion FrontendProtocol;
3030
volatile sig_atomic_t InterruptPending = false;
3131
volatile sig_atomic_t QueryCancelPending = false;
3232
volatile sig_atomic_t ProcDiePending = false;
33+
volatile sig_atomic_t CheckClientConnectionPending = false;
3334
volatile sig_atomic_t ClientConnectionLost = false;
3435
volatile sig_atomic_t IdleInTransactionSessionTimeoutPending = false;
3536
volatile sig_atomic_t IdleSessionTimeoutPending = false;

src/backend/utils/init/postinit.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ static void StatementTimeoutHandler(void);
7373
static void LockTimeoutHandler(void);
7474
static void IdleInTransactionSessionTimeoutHandler(void);
7575
static void IdleSessionTimeoutHandler(void);
76+
static void ClientCheckTimeoutHandler(void);
7677
static bool ThereIsAtLeastOneRole(void);
7778
static void process_startup_options(Port *port, bool am_superuser);
7879
static void process_settings(Oid databaseid, Oid roleid);
@@ -620,6 +621,7 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username,
620621
RegisterTimeout(IDLE_IN_TRANSACTION_SESSION_TIMEOUT,
621622
IdleInTransactionSessionTimeoutHandler);
622623
RegisterTimeout(IDLE_SESSION_TIMEOUT, IdleSessionTimeoutHandler);
624+
RegisterTimeout(CLIENT_CONNECTION_CHECK_TIMEOUT, ClientCheckTimeoutHandler);
623625
}
624626

625627
/*
@@ -1242,6 +1244,14 @@ IdleSessionTimeoutHandler(void)
12421244
SetLatch(MyLatch);
12431245
}
12441246

1247+
static void
1248+
ClientCheckTimeoutHandler(void)
1249+
{
1250+
CheckClientConnectionPending = true;
1251+
InterruptPending = true;
1252+
SetLatch(MyLatch);
1253+
}
1254+
12451255
/*
12461256
* Returns true if at least one role is defined in this database cluster.
12471257
*/

src/backend/utils/misc/guc.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
#include <float.h>
2121
#include <math.h>
2222
#include <limits.h>
23+
#ifdef HAVE_POLL_H
24+
#include <poll.h>
25+
#endif
2326
#ifndef WIN32
2427
#include <sys/mman.h>
2528
#endif
@@ -204,6 +207,7 @@ static bool check_autovacuum_work_mem(int *newval, void **extra, GucSource sourc
204207
static bool check_effective_io_concurrency(int *newval, void **extra, GucSource source);
205208
static bool check_maintenance_io_concurrency(int *newval, void **extra, GucSource source);
206209
static bool check_huge_page_size(int *newval, void **extra, GucSource source);
210+
static bool check_client_connection_check_interval(int *newval, void **extra, GucSource source);
207211
static void assign_pgstat_temp_directory(const char *newval, void *extra);
208212
static bool check_application_name(char **newval, void **extra, GucSource source);
209213
static void assign_application_name(const char *newval, void *extra);
@@ -3501,6 +3505,17 @@ static struct config_int ConfigureNamesInt[] =
35013505
NULL, NULL, NULL
35023506
},
35033507

3508+
{
3509+
{"client_connection_check_interval", PGC_USERSET, CLIENT_CONN_OTHER,
3510+
gettext_noop("Sets the time interval between checks for disconnection while running queries."),
3511+
NULL,
3512+
GUC_UNIT_MS
3513+
},
3514+
&client_connection_check_interval,
3515+
0, 0, INT_MAX,
3516+
check_client_connection_check_interval, NULL, NULL
3517+
},
3518+
35043519
/* End-of-list marker */
35053520
{
35063521
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
@@ -11980,6 +11995,20 @@ check_huge_page_size(int *newval, void **extra, GucSource source)
1198011995
return true;
1198111996
}
1198211997

11998+
static bool
11999+
check_client_connection_check_interval(int *newval, void **extra, GucSource source)
12000+
{
12001+
#ifndef POLLRDHUP
12002+
/* Linux only, for now. See pq_check_connection(). */
12003+
if (*newval != 0)
12004+
{
12005+
GUC_check_errdetail("client_connection_check_interval must be set to 0 on platforms that lack POLLRDHUP.");
12006+
return false;
12007+
}
12008+
#endif
12009+
return true;
12010+
}
12011+
1198312012
static void
1198412013
assign_pgstat_temp_directory(const char *newval, void *extra)
1198512014
{

src/backend/utils/misc/postgresql.conf.sample

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -719,6 +719,9 @@
719719

720720
#dynamic_library_path = '$libdir'
721721

722+
#client_connection_check_interval = 0 # time between checks for client
723+
# disconnection while running queries;
724+
# 0 for never
722725

723726
#------------------------------------------------------------------------------
724727
# LOCK MANAGEMENT

src/include/libpq/libpq.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ extern int pq_getbyte(void);
7171
extern int pq_peekbyte(void);
7272
extern int pq_getbyte_if_available(unsigned char *c);
7373
extern int pq_putmessage_v2(char msgtype, const char *s, size_t len);
74+
extern bool pq_check_connection(void);
7475

7576
/*
7677
* prototypes for functions in be-secure.c

src/include/miscadmin.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ extern PGDLLIMPORT volatile sig_atomic_t IdleInTransactionSessionTimeoutPending;
8585
extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending;
8686
extern PGDLLIMPORT volatile sig_atomic_t ProcSignalBarrierPending;
8787

88+
extern PGDLLIMPORT volatile sig_atomic_t CheckClientConnectionPending;
8889
extern PGDLLIMPORT volatile sig_atomic_t ClientConnectionLost;
8990

9091
/* these are marked volatile because they are examined by signal handlers: */

src/include/tcop/tcopprot.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ extern CommandDest whereToSendOutput;
2929
extern PGDLLIMPORT const char *debug_query_string;
3030
extern int max_stack_depth;
3131
extern int PostAuthDelay;
32+
extern int client_connection_check_interval;
3233

3334
/* GUC-configurable parameters */
3435

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy