Skip to content

Commit 966970e

Browse files
committed
Re-revert stats collector latch changes.
This reverts commit cb2f287, restoring the latch-ified stats collector logic. We'll soon see if this works any better on the Windows buildfarm machines.
1 parent b85427f commit 966970e

File tree

1 file changed

+68
-117
lines changed

1 file changed

+68
-117
lines changed

src/backend/postmaster/pgstat.c

Lines changed: 68 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,6 @@
2828
#include <arpa/inet.h>
2929
#include <signal.h>
3030
#include <time.h>
31-
#ifdef HAVE_POLL_H
32-
#include <poll.h>
33-
#endif
34-
#ifdef HAVE_SYS_POLL_H
35-
#include <sys/poll.h>
36-
#endif
3731

3832
#include "pgstat.h"
3933

@@ -55,8 +49,8 @@
5549
#include "storage/backendid.h"
5650
#include "storage/fd.h"
5751
#include "storage/ipc.h"
52+
#include "storage/latch.h"
5853
#include "storage/pg_shmem.h"
59-
#include "storage/pmsignal.h"
6054
#include "storage/procsignal.h"
6155
#include "utils/ascii.h"
6256
#include "utils/guc.h"
@@ -94,9 +88,6 @@
9488
* failed statistics collector; in
9589
* seconds. */
9690

97-
#define PGSTAT_SELECT_TIMEOUT 2 /* How often to check for postmaster
98-
* death; in seconds. */
99-
10091
#define PGSTAT_POLL_LOOP_COUNT (PGSTAT_MAX_WAIT_TIME / PGSTAT_RETRY_DELAY)
10192
#define PGSTAT_INQ_LOOP_COUNT (PGSTAT_INQ_INTERVAL / PGSTAT_RETRY_DELAY)
10293

@@ -139,6 +130,8 @@ PgStat_MsgBgWriter BgWriterStats;
139130
*/
140131
NON_EXEC_STATIC pgsocket pgStatSock = PGINVALID_SOCKET;
141132

133+
static Latch pgStatLatch;
134+
142135
static struct sockaddr_storage pgStatAddr;
143136

144137
static time_t last_pgstat_start_time;
@@ -3009,15 +3002,7 @@ PgstatCollectorMain(int argc, char *argv[])
30093002
{
30103003
int len;
30113004
PgStat_Msg msg;
3012-
3013-
#ifndef WIN32
3014-
#ifdef HAVE_POLL
3015-
struct pollfd input_fd;
3016-
#else
3017-
struct timeval sel_timeout;
3018-
fd_set rfds;
3019-
#endif
3020-
#endif
3005+
int wr;
30213006

30223007
IsUnderPostmaster = true; /* we are a postmaster subprocess now */
30233008

@@ -3036,9 +3021,13 @@ PgstatCollectorMain(int argc, char *argv[])
30363021
elog(FATAL, "setsid() failed: %m");
30373022
#endif
30383023

3024+
/* Initialize private latch for use by signal handlers */
3025+
InitLatch(&pgStatLatch);
3026+
30393027
/*
30403028
* Ignore all signals usually bound to some action in the postmaster,
3041-
* except SIGQUIT.
3029+
* except SIGHUP and SIGQUIT. Note we don't need a SIGUSR1 handler to
3030+
* support latch operations, because pgStatLatch is local not shared.
30423031
*/
30433032
pqsignal(SIGHUP, pgstat_sighup_handler);
30443033
pqsignal(SIGINT, SIG_IGN);
@@ -3073,26 +3062,24 @@ PgstatCollectorMain(int argc, char *argv[])
30733062
pgStatRunningInCollector = true;
30743063
pgStatDBHash = pgstat_read_statsfile(InvalidOid, true);
30753064

3076-
/*
3077-
* Setup the descriptor set for select(2). Since only one bit in the set
3078-
* ever changes, we need not repeat FD_ZERO each time.
3079-
*/
3080-
#if !defined(HAVE_POLL) && !defined(WIN32)
3081-
FD_ZERO(&rfds);
3082-
#endif
3083-
30843065
/*
30853066
* Loop to process messages until we get SIGQUIT or detect ungraceful
30863067
* death of our parent postmaster.
30873068
*
3088-
* For performance reasons, we don't want to do a PostmasterIsAlive() test
3089-
* after every message; instead, do it only when select()/poll() is
3090-
* interrupted by timeout. In essence, we'll stay alive as long as
3091-
* backends keep sending us stuff often, even if the postmaster is gone.
3069+
* For performance reasons, we don't want to do ResetLatch/WaitLatch after
3070+
* every message; instead, do that only after a recv() fails to obtain a
3071+
* message. (This effectively means that if backends are sending us stuff
3072+
* like mad, we won't notice postmaster death until things slack off a
3073+
* bit; which seems fine.) To do that, we have an inner loop that
3074+
* iterates as long as recv() succeeds. We do recognize got_SIGHUP inside
3075+
* the inner loop, which means that such interrupts will get serviced but
3076+
* the latch won't get cleared until next time there is a break in the
3077+
* action.
30923078
*/
30933079
for (;;)
30943080
{
3095-
int got_data;
3081+
/* Clear any already-pending wakeups */
3082+
ResetLatch(&pgStatLatch);
30963083

30973084
/*
30983085
* Quit if we get SIGQUIT from the postmaster.
@@ -3101,87 +3088,37 @@ PgstatCollectorMain(int argc, char *argv[])
31013088
break;
31023089

31033090
/*
3104-
* Reload configuration if we got SIGHUP from the postmaster.
3105-
*/
3106-
if (got_SIGHUP)
3107-
{
3108-
ProcessConfigFile(PGC_SIGHUP);
3109-
got_SIGHUP = false;
3110-
}
3111-
3112-
/*
3113-
* Write the stats file if a new request has arrived that is not
3114-
* satisfied by existing file.
3091+
* Inner loop iterates as long as we keep getting messages, or until
3092+
* need_exit becomes set.
31153093
*/
3116-
if (last_statwrite < last_statrequest)
3117-
pgstat_write_statsfile(false);
3118-
3119-
/*
3120-
* Wait for a message to arrive; but not for more than
3121-
* PGSTAT_SELECT_TIMEOUT seconds. (This determines how quickly we will
3122-
* shut down after an ungraceful postmaster termination; so it needn't
3123-
* be very fast. However, on some systems SIGQUIT won't interrupt the
3124-
* poll/select call, so this also limits speed of response to SIGQUIT,
3125-
* which is more important.)
3126-
*
3127-
* We use poll(2) if available, otherwise select(2). Win32 has its own
3128-
* implementation.
3129-
*/
3130-
#ifndef WIN32
3131-
#ifdef HAVE_POLL
3132-
input_fd.fd = pgStatSock;
3133-
input_fd.events = POLLIN | POLLERR;
3134-
input_fd.revents = 0;
3135-
3136-
if (poll(&input_fd, 1, PGSTAT_SELECT_TIMEOUT * 1000) < 0)
3137-
{
3138-
if (errno == EINTR)
3139-
continue;
3140-
ereport(ERROR,
3141-
(errcode_for_socket_access(),
3142-
errmsg("poll() failed in statistics collector: %m")));
3143-
}
3144-
3145-
got_data = (input_fd.revents != 0);
3146-
#else /* !HAVE_POLL */
3147-
3148-
FD_SET(pgStatSock, &rfds);
3149-
3150-
/*
3151-
* timeout struct is modified by select() on some operating systems,
3152-
* so re-fill it each time.
3153-
*/
3154-
sel_timeout.tv_sec = PGSTAT_SELECT_TIMEOUT;
3155-
sel_timeout.tv_usec = 0;
3156-
3157-
if (select(pgStatSock + 1, &rfds, NULL, NULL, &sel_timeout) < 0)
3094+
while (!need_exit)
31583095
{
3159-
if (errno == EINTR)
3160-
continue;
3161-
ereport(ERROR,
3162-
(errcode_for_socket_access(),
3163-
errmsg("select() failed in statistics collector: %m")));
3164-
}
3096+
/*
3097+
* Reload configuration if we got SIGHUP from the postmaster.
3098+
*/
3099+
if (got_SIGHUP)
3100+
{
3101+
got_SIGHUP = false;
3102+
ProcessConfigFile(PGC_SIGHUP);
3103+
}
31653104

3166-
got_data = FD_ISSET(pgStatSock, &rfds);
3167-
#endif /* HAVE_POLL */
3168-
#else /* WIN32 */
3169-
got_data = pgwin32_waitforsinglesocket(pgStatSock, FD_READ,
3170-
PGSTAT_SELECT_TIMEOUT * 1000);
3171-
#endif
3105+
/*
3106+
* Write the stats file if a new request has arrived that is not
3107+
* satisfied by existing file.
3108+
*/
3109+
if (last_statwrite < last_statrequest)
3110+
pgstat_write_statsfile(false);
31723111

3173-
/*
3174-
* If there is a message on the socket, read it and check for
3175-
* validity.
3176-
*/
3177-
if (got_data)
3178-
{
3112+
/*
3113+
* Try to receive and process a message. This will not block,
3114+
* since the socket is set to non-blocking mode.
3115+
*/
31793116
len = recv(pgStatSock, (char *) &msg,
31803117
sizeof(PgStat_Msg), 0);
31813118
if (len < 0)
31823119
{
3183-
if (errno == EINTR)
3184-
continue;
3120+
if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
3121+
break; /* out of inner loop */
31853122
ereport(ERROR,
31863123
(errcode_for_socket_access(),
31873124
errmsg("could not read statistics message: %m")));
@@ -3279,17 +3216,21 @@ PgstatCollectorMain(int argc, char *argv[])
32793216
default:
32803217
break;
32813218
}
3282-
}
3283-
else
3284-
{
3285-
/*
3286-
* We can only get here if the select/poll timeout elapsed. Check
3287-
* for postmaster death.
3288-
*/
3289-
if (!PostmasterIsAlive())
3290-
break;
3291-
}
3292-
} /* end of message-processing loop */
3219+
} /* end of inner message-processing loop */
3220+
3221+
/* Sleep until there's something to do */
3222+
wr = WaitLatchOrSocket(&pgStatLatch,
3223+
WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_SOCKET_READABLE,
3224+
pgStatSock,
3225+
-1L);
3226+
3227+
/*
3228+
* Emergency bailout if postmaster has died. This is to avoid the
3229+
* necessity for manual cleanup of all postmaster children.
3230+
*/
3231+
if (wr & WL_POSTMASTER_DEATH)
3232+
break;
3233+
} /* end of outer loop */
32933234

32943235
/*
32953236
* Save the final stats to reuse at next startup.
@@ -3304,14 +3245,24 @@ PgstatCollectorMain(int argc, char *argv[])
33043245
static void
33053246
pgstat_exit(SIGNAL_ARGS)
33063247
{
3248+
int save_errno = errno;
3249+
33073250
need_exit = true;
3251+
SetLatch(&pgStatLatch);
3252+
3253+
errno = save_errno;
33083254
}
33093255

33103256
/* SIGHUP handler for collector process */
33113257
static void
33123258
pgstat_sighup_handler(SIGNAL_ARGS)
33133259
{
3260+
int save_errno = errno;
3261+
33143262
got_SIGHUP = true;
3263+
SetLatch(&pgStatLatch);
3264+
3265+
errno = save_errno;
33153266
}
33163267

33173268

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy