Skip to content

Commit 969d7cd

Browse files
committed
Install a "dead man switch" to allow the postmaster to detect cases where
a backend has done exit(0) or exit(1) without having disengaged itself from shared memory. We are at risk for this whenever third-party code is loaded into a backend, since such code might not know it's supposed to go through proc_exit() instead. Also, it is reported that under Windows there are ways to externally kill a process that cause the status code returned to the postmaster to be indistinguishable from a voluntary exit (thank you, Microsoft). If this does happen then the system is probably hosed --- for instance, the dead session might still be holding locks. So the best recovery method is to treat this like a backend crash. The dead man switch is armed for a particular child process when it acquires a regular PGPROC, and disarmed when the PGPROC is released; these should be the first and last touches of shared memory resources in a backend, or close enough anyway. This choice means there is no coverage for auxiliary processes, but I doubt we need that, since they shouldn't be executing any user-provided code anyway. This patch also improves the management of the EXEC_BACKEND ShmemBackendArray array a bit, by reducing search costs. Although this problem is of long standing, the lack of field complaints seems to mean it's not critical enough to risk back-patching; at least not till we get some more testing of this mechanism.
1 parent 8f34811 commit 969d7cd

File tree

8 files changed

+280
-92
lines changed

8 files changed

+280
-92
lines changed

src/backend/postmaster/postmaster.c

Lines changed: 97 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
*
3838
*
3939
* IDENTIFICATION
40-
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.580 2009/05/04 02:46:36 tgl Exp $
40+
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.581 2009/05/05 19:59:00 tgl Exp $
4141
*
4242
* NOTES
4343
*
@@ -135,12 +135,14 @@
135135
* Also, "dead_end" children are in it: these are children launched just
136136
* for the purpose of sending a friendly rejection message to a would-be
137137
* client. We must track them because they are attached to shared memory,
138-
* but we know they will never become live backends.
138+
* but we know they will never become live backends. dead_end children are
139+
* not assigned a PMChildSlot.
139140
*/
140141
typedef struct bkend
141142
{
142143
pid_t pid; /* process id of backend */
143144
long cancel_key; /* cancel key for cancels for this backend */
145+
int child_slot; /* PMChildSlot for this backend, if any */
144146
bool is_autovacuum; /* is it an autovacuum process? */
145147
bool dead_end; /* is it going to send an error and quit? */
146148
Dlelem elem; /* list link in BackendList */
@@ -149,15 +151,6 @@ typedef struct bkend
149151
static Dllist *BackendList;
150152

151153
#ifdef EXEC_BACKEND
152-
/*
153-
* Number of entries in the shared-memory backend table. This table is used
154-
* only for sending cancels, and therefore only includes children we allow
155-
* cancels on: regular backends and autovac workers. In particular we exclude
156-
* dead_end children, allowing the table to have a known maximum size, to wit
157-
* the same too-many-children limit enforced by canAcceptConnections().
158-
*/
159-
#define NUM_BACKENDARRAY_ELEMS (2*MaxBackends)
160-
161154
static Backend *ShmemBackendArray;
162155
#endif
163156

@@ -404,6 +397,7 @@ typedef struct
404397
char DataDir[MAXPGPATH];
405398
int ListenSocket[MAXLISTEN];
406399
long MyCancelKey;
400+
int MyPMChildSlot;
407401
unsigned long UsedShmemSegID;
408402
void *UsedShmemSegAddr;
409403
slock_t *ShmemLock;
@@ -413,6 +407,7 @@ typedef struct
413407
slock_t *ProcStructLock;
414408
PROC_HDR *ProcGlobal;
415409
PGPROC *AuxiliaryProcs;
410+
PMSignalData *PMSignalState;
416411
InheritableSocket pgStatSock;
417412
pid_t PostmasterPid;
418413
TimestampTz PgStartTime;
@@ -443,7 +438,7 @@ static bool save_backend_variables(BackendParameters * param, Port *port,
443438
#endif
444439

445440
static void ShmemBackendArrayAdd(Backend *bn);
446-
static void ShmemBackendArrayRemove(pid_t pid);
441+
static void ShmemBackendArrayRemove(Backend *bn);
447442
#endif /* EXEC_BACKEND */
448443

449444
#define StartupDataBase() StartChildProcess(StartupProcess)
@@ -1771,7 +1766,7 @@ processCancelRequest(Port *port, void *pkt)
17711766
{
17721767
bp = (Backend *) DLE_VAL(curr);
17731768
#else
1774-
for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
1769+
for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--)
17751770
{
17761771
bp = (Backend *) &ShmemBackendArray[i];
17771772
#endif
@@ -1836,10 +1831,10 @@ canAcceptConnections(void)
18361831
* MaxBackends limit is enforced when a new backend tries to join the
18371832
* shared-inval backend array.
18381833
*
1839-
* In the EXEC_BACKEND case, the limit here must match the size of the
1840-
* ShmemBackendArray, since all these processes will have cancel codes.
1834+
* The limit here must match the sizes of the per-child-process arrays;
1835+
* see comments for MaxLivePostmasterChildren().
18411836
*/
1842-
if (CountChildren() >= 2 * MaxBackends)
1837+
if (CountChildren() >= MaxLivePostmasterChildren())
18431838
return CAC_TOOMANY;
18441839

18451840
return CAC_OK;
@@ -2439,8 +2434,8 @@ CleanupBackend(int pid,
24392434
/*
24402435
* If a backend dies in an ugly way then we must signal all other backends
24412436
* to quickdie. If exit status is zero (normal) or one (FATAL exit), we
2442-
* assume everything is all right and simply remove the backend from the
2443-
* active backend list.
2437+
* assume everything is all right and proceed to remove the backend from
2438+
* the active backend list.
24442439
*/
24452440
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
24462441
{
@@ -2454,10 +2449,21 @@ CleanupBackend(int pid,
24542449

24552450
if (bp->pid == pid)
24562451
{
2457-
#ifdef EXEC_BACKEND
24582452
if (!bp->dead_end)
2459-
ShmemBackendArrayRemove(pid);
2453+
{
2454+
if (!ReleasePostmasterChildSlot(bp->child_slot))
2455+
{
2456+
/*
2457+
* Uh-oh, the child failed to clean itself up. Treat
2458+
* as a crash after all.
2459+
*/
2460+
HandleChildCrash(pid, exitstatus, _("server process"));
2461+
return;
2462+
}
2463+
#ifdef EXEC_BACKEND
2464+
ShmemBackendArrayRemove(bp);
24602465
#endif
2466+
}
24612467
DLRemove(curr);
24622468
free(bp);
24632469
break;
@@ -2500,10 +2506,13 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
25002506
/*
25012507
* Found entry for freshly-dead backend, so remove it.
25022508
*/
2503-
#ifdef EXEC_BACKEND
25042509
if (!bp->dead_end)
2505-
ShmemBackendArrayRemove(pid);
2510+
{
2511+
(void) ReleasePostmasterChildSlot(bp->child_slot);
2512+
#ifdef EXEC_BACKEND
2513+
ShmemBackendArrayRemove(bp);
25062514
#endif
2515+
}
25072516
DLRemove(curr);
25082517
free(bp);
25092518
/* Keep looping so we can signal remaining backends */
@@ -2931,14 +2940,7 @@ BackendStartup(Port *port)
29312940
pid_t pid;
29322941

29332942
/*
2934-
* Compute the cancel key that will be assigned to this backend. The
2935-
* backend will have its own copy in the forked-off process' value of
2936-
* MyCancelKey, so that it can transmit the key to the frontend.
2937-
*/
2938-
MyCancelKey = PostmasterRandom();
2939-
2940-
/*
2941-
* Make room for backend data structure. Better before the fork() so we
2943+
* Create backend data structure. Better before the fork() so we
29422944
* can handle failure cleanly.
29432945
*/
29442946
bn = (Backend *) malloc(sizeof(Backend));
@@ -2950,8 +2952,26 @@ BackendStartup(Port *port)
29502952
return STATUS_ERROR;
29512953
}
29522954

2955+
/*
2956+
* Compute the cancel key that will be assigned to this backend. The
2957+
* backend will have its own copy in the forked-off process' value of
2958+
* MyCancelKey, so that it can transmit the key to the frontend.
2959+
*/
2960+
MyCancelKey = PostmasterRandom();
2961+
bn->cancel_key = MyCancelKey;
2962+
29532963
/* Pass down canAcceptConnections state */
29542964
port->canAcceptConnections = canAcceptConnections();
2965+
bn->dead_end = (port->canAcceptConnections != CAC_OK &&
2966+
port->canAcceptConnections != CAC_WAITBACKUP);
2967+
2968+
/*
2969+
* Unless it's a dead_end child, assign it a child slot number
2970+
*/
2971+
if (!bn->dead_end)
2972+
bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
2973+
else
2974+
bn->child_slot = 0;
29552975

29562976
#ifdef EXEC_BACKEND
29572977
pid = backend_forkexec(port);
@@ -3009,10 +3029,7 @@ BackendStartup(Port *port)
30093029
* of backends.
30103030
*/
30113031
bn->pid = pid;
3012-
bn->cancel_key = MyCancelKey;
30133032
bn->is_autovacuum = false;
3014-
bn->dead_end = (port->canAcceptConnections != CAC_OK &&
3015-
port->canAcceptConnections != CAC_WAITBACKUP);
30163033
DLInitElem(&bn->elem, bn);
30173034
DLAddHead(BackendList, &bn->elem);
30183035
#ifdef EXEC_BACKEND
@@ -4271,23 +4288,26 @@ StartAutovacuumWorker(void)
42714288
*/
42724289
if (canAcceptConnections() == CAC_OK)
42734290
{
4274-
/*
4275-
* Compute the cancel key that will be assigned to this session. We
4276-
* probably don't need cancel keys for autovac workers, but we'd
4277-
* better have something random in the field to prevent unfriendly
4278-
* people from sending cancels to them.
4279-
*/
4280-
MyCancelKey = PostmasterRandom();
4281-
42824291
bn = (Backend *) malloc(sizeof(Backend));
42834292
if (bn)
42844293
{
4294+
/*
4295+
* Compute the cancel key that will be assigned to this session. We
4296+
* probably don't need cancel keys for autovac workers, but we'd
4297+
* better have something random in the field to prevent unfriendly
4298+
* people from sending cancels to them.
4299+
*/
4300+
MyCancelKey = PostmasterRandom();
4301+
bn->cancel_key = MyCancelKey;
4302+
4303+
/* Autovac workers are not dead_end and need a child slot */
4304+
bn->dead_end = false;
4305+
bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
4306+
42854307
bn->pid = StartAutoVacWorker();
42864308
if (bn->pid > 0)
42874309
{
4288-
bn->cancel_key = MyCancelKey;
42894310
bn->is_autovacuum = true;
4290-
bn->dead_end = false;
42914311
DLInitElem(&bn->elem, bn);
42924312
DLAddHead(BackendList, &bn->elem);
42934313
#ifdef EXEC_BACKEND
@@ -4353,6 +4373,24 @@ CreateOptsFile(int argc, char *argv[], char *fullprogname)
43534373
}
43544374

43554375

4376+
/*
4377+
* MaxLivePostmasterChildren
4378+
*
4379+
* This reports the number of entries needed in per-child-process arrays
4380+
* (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
4381+
* These arrays include regular backends and autovac workers, but not special
4382+
* children nor dead_end children. This allows the arrays to have a fixed
4383+
* maximum size, to wit the same too-many-children limit enforced by
4384+
* canAcceptConnections(). The exact value isn't too critical as long as
4385+
* it's more than MaxBackends.
4386+
*/
4387+
int
4388+
MaxLivePostmasterChildren(void)
4389+
{
4390+
return 2 * MaxBackends;
4391+
}
4392+
4393+
43564394
#ifdef EXEC_BACKEND
43574395

43584396
/*
@@ -4364,6 +4402,7 @@ extern LWLock *LWLockArray;
43644402
extern slock_t *ProcStructLock;
43654403
extern PROC_HDR *ProcGlobal;
43664404
extern PGPROC *AuxiliaryProcs;
4405+
extern PMSignalData *PMSignalState;
43674406
extern int pgStatSock;
43684407

43694408
#ifndef WIN32
@@ -4395,6 +4434,7 @@ save_backend_variables(BackendParameters * param, Port *port,
43954434
memcpy(&param->ListenSocket, &ListenSocket, sizeof(ListenSocket));
43964435

43974436
param->MyCancelKey = MyCancelKey;
4437+
param->MyPMChildSlot = MyPMChildSlot;
43984438

43994439
param->UsedShmemSegID = UsedShmemSegID;
44004440
param->UsedShmemSegAddr = UsedShmemSegAddr;
@@ -4407,6 +4447,7 @@ save_backend_variables(BackendParameters * param, Port *port,
44074447
param->ProcStructLock = ProcStructLock;
44084448
param->ProcGlobal = ProcGlobal;
44094449
param->AuxiliaryProcs = AuxiliaryProcs;
4450+
param->PMSignalState = PMSignalState;
44104451
write_inheritable_socket(&param->pgStatSock, pgStatSock, childPid);
44114452

44124453
param->PostmasterPid = PostmasterPid;
@@ -4601,6 +4642,7 @@ restore_backend_variables(BackendParameters * param, Port *port)
46014642
memcpy(&ListenSocket, &param->ListenSocket, sizeof(ListenSocket));
46024643

46034644
MyCancelKey = param->MyCancelKey;
4645+
MyPMChildSlot = param->MyPMChildSlot;
46044646

46054647
UsedShmemSegID = param->UsedShmemSegID;
46064648
UsedShmemSegAddr = param->UsedShmemSegAddr;
@@ -4613,6 +4655,7 @@ restore_backend_variables(BackendParameters * param, Port *port)
46134655
ProcStructLock = param->ProcStructLock;
46144656
ProcGlobal = param->ProcGlobal;
46154657
AuxiliaryProcs = param->AuxiliaryProcs;
4658+
PMSignalState = param->PMSignalState;
46164659
read_inheritable_socket(&pgStatSock, &param->pgStatSock);
46174660

46184661
PostmasterPid = param->PostmasterPid;
@@ -4642,7 +4685,7 @@ restore_backend_variables(BackendParameters * param, Port *port)
46424685
Size
46434686
ShmemBackendArraySize(void)
46444687
{
4645-
return mul_size(NUM_BACKENDARRAY_ELEMS, sizeof(Backend));
4688+
return mul_size(MaxLivePostmasterChildren(), sizeof(Backend));
46464689
}
46474690

46484691
void
@@ -4658,41 +4701,23 @@ ShmemBackendArrayAllocation(void)
46584701
static void
46594702
ShmemBackendArrayAdd(Backend *bn)
46604703
{
4661-
int i;
4662-
4663-
/* Find an empty slot */
4664-
for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
4665-
{
4666-
if (ShmemBackendArray[i].pid == 0)
4667-
{
4668-
ShmemBackendArray[i] = *bn;
4669-
return;
4670-
}
4671-
}
4704+
/* The array slot corresponding to my PMChildSlot should be free */
4705+
int i = bn->child_slot - 1;
46724706

4673-
ereport(FATAL,
4674-
(errmsg_internal("no free slots in shmem backend array")));
4707+
Assert(ShmemBackendArray[i].pid == 0);
4708+
ShmemBackendArray[i] = *bn;
46754709
}
46764710

46774711
static void
4678-
ShmemBackendArrayRemove(pid_t pid)
4712+
ShmemBackendArrayRemove(Backend *bn)
46794713
{
4680-
int i;
4681-
4682-
for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
4683-
{
4684-
if (ShmemBackendArray[i].pid == pid)
4685-
{
4686-
/* Mark the slot as empty */
4687-
ShmemBackendArray[i].pid = 0;
4688-
return;
4689-
}
4690-
}
4714+
int i = bn->child_slot - 1;
46914715

4692-
ereport(WARNING,
4693-
(errmsg_internal("could not find backend entry with pid %d",
4694-
(int) pid)));
4716+
Assert(ShmemBackendArray[i].pid == bn->pid);
4717+
/* Mark the slot as empty */
4718+
ShmemBackendArray[i].pid = 0;
46954719
}
4720+
46964721
#endif /* EXEC_BACKEND */
46974722

46984723

src/backend/storage/ipc/ipci.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.99 2009/01/03 17:08:39 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.100 2009/05/05 19:59:00 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -111,6 +111,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
111111
size = add_size(size, ProcArrayShmemSize());
112112
size = add_size(size, BackendStatusShmemSize());
113113
size = add_size(size, SInvalShmemSize());
114+
size = add_size(size, PMSignalShmemSize());
114115
size = add_size(size, BgWriterShmemSize());
115116
size = add_size(size, AutoVacuumShmemSize());
116117
size = add_size(size, BTreeShmemSize());
@@ -206,7 +207,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
206207
/*
207208
* Set up interprocess signaling mechanisms
208209
*/
209-
PMSignalInit();
210+
PMSignalShmemInit();
210211
BgWriterShmemInit();
211212
AutoVacuumShmemInit();
212213

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy