Skip to content

Commit 55b454d

Browse files
anarazelmacdice
andcommitted
aio: Infrastructure for io_method=worker
This commit contains the basic, system-wide, infrastructure for io_method=worker. It does not yet actually execute IO, this commit just provides the infrastructure for running IO workers, kept separate for easier review. The number of IO workers can be adjusted with a PGC_SIGHUP GUC. Eventually we'd like to make the number of workers dynamically scale up/down based on the current "IO load". To allow the number of IO workers to be increased without a restart, we need to reserve PGPROC entries for the workers unconditionally. This has been judged to be worth the cost. If it turns out to be problematic, we can introduce a PGC_POSTMASTER GUC to control the maximum number. As io workers might be needed during shutdown, e.g. for AIO during the shutdown checkpoint, a new PMState phase is added. IO workers are shut down after the shutdown checkpoint has been performed and walsender/archiver have shut down, but before the checkpointer itself shuts down. See also 87a6690. Updates PGSTAT_FILE_FORMAT_ID due to the addition of a new BackendType. Reviewed-by: Noah Misch <noah@leadboat.com> Co-authored-by: Thomas Munro <thomas.munro@gmail.com> Co-authored-by: Andres Freund <andres@anarazel.de> Discussion: https://postgr.es/m/uvrtrknj4kdytuboidbhwclo4gxhswwcpgadptsjvjqcluzmah%40brqs62irg4dt Discussion: https://postgr.es/m/20210223100344.llw5an2aklengrmn@alap3.anarazel.de Discussion: https://postgr.es/m/stj36ea6yyhoxtqkhpieia2z4krnam7qyetc57rfezgk4zgapf@gcnactj4z56m
1 parent 549ea06 commit 55b454d

File tree

20 files changed

+342
-15
lines changed

20 files changed

+342
-15
lines changed

doc/src/sgml/config.sgml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2689,6 +2689,25 @@ include_dir 'conf.d'
26892689
</listitem>
26902690
</varlistentry>
26912691

2692+
<varlistentry id="guc-io-workers" xreflabel="io_workers">
2693+
<term><varname>io_workers</varname> (<type>int</type>)
2694+
<indexterm>
2695+
<primary><varname>io_workers</varname> configuration parameter</primary>
2696+
</indexterm>
2697+
</term>
2698+
<listitem>
2699+
<para>
2700+
Selects the number of I/O worker processes to use. The default is
2701+
3. This parameter can only be set in the
2702+
<filename>postgresql.conf</filename> file or on the server command
2703+
line.
2704+
</para>
2705+
<para>
2706+
Only has an effect if <xref linkend="guc-io-method"/> is set to
2707+
<literal>worker</literal>.
2708+
</para>
2709+
</listitem>
2710+
</varlistentry>
26922711
</variablelist>
26932712
</sect2>
26942713

src/backend/postmaster/launch_backend.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
#include "replication/slotsync.h"
4949
#include "replication/walreceiver.h"
5050
#include "storage/dsm.h"
51+
#include "storage/io_worker.h"
5152
#include "storage/pg_shmem.h"
5253
#include "tcop/backend_startup.h"
5354
#include "utils/memutils.h"
@@ -197,6 +198,7 @@ static child_process_kind child_process_kinds[] = {
197198
[B_ARCHIVER] = {"archiver", PgArchiverMain, true},
198199
[B_BG_WRITER] = {"bgwriter", BackgroundWriterMain, true},
199200
[B_CHECKPOINTER] = {"checkpointer", CheckpointerMain, true},
201+
[B_IO_WORKER] = {"io_worker", IoWorkerMain, true},
200202
[B_STARTUP] = {"startup", StartupProcessMain, true},
201203
[B_WAL_RECEIVER] = {"wal_receiver", WalReceiverMain, true},
202204
[B_WAL_SUMMARIZER] = {"wal_summarizer", WalSummarizerMain, true},

src/backend/postmaster/pmchild.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ InitPostmasterChildSlots(void)
101101

102102
pmchild_pools[B_AUTOVAC_WORKER].size = autovacuum_worker_slots;
103103
pmchild_pools[B_BG_WORKER].size = max_worker_processes;
104+
pmchild_pools[B_IO_WORKER].size = MAX_IO_WORKERS;
104105

105106
/*
106107
* There can be only one of each of these running at a time. They each

src/backend/postmaster/postmaster.c

Lines changed: 162 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,12 @@
108108
#include "replication/logicallauncher.h"
109109
#include "replication/slotsync.h"
110110
#include "replication/walsender.h"
111+
#include "storage/aio_subsys.h"
111112
#include "storage/fd.h"
113+
#include "storage/io_worker.h"
112114
#include "storage/ipc.h"
113115
#include "storage/pmsignal.h"
116+
#include "storage/proc.h"
114117
#include "tcop/backend_startup.h"
115118
#include "tcop/tcopprot.h"
116119
#include "utils/datetime.h"
@@ -340,6 +343,7 @@ typedef enum
340343
* ckpt */
341344
PM_WAIT_XLOG_ARCHIVAL, /* waiting for archiver and walsenders to
342345
* finish */
346+
PM_WAIT_IO_WORKERS, /* waiting for io workers to exit */
343347
PM_WAIT_CHECKPOINTER, /* waiting for checkpointer to shut down */
344348
PM_WAIT_DEAD_END, /* waiting for dead-end children to exit */
345349
PM_NO_CHILDREN, /* all important children have exited */
@@ -402,6 +406,10 @@ bool LoadedSSL = false;
402406
static DNSServiceRef bonjour_sdref = NULL;
403407
#endif
404408

409+
/* State for IO worker management. */
410+
static int io_worker_count = 0;
411+
static PMChild *io_worker_children[MAX_IO_WORKERS];
412+
405413
/*
406414
* postmaster.c - function prototypes
407415
*/
@@ -436,6 +444,8 @@ static void TerminateChildren(int signal);
436444
static int CountChildren(BackendTypeMask targetMask);
437445
static void LaunchMissingBackgroundProcesses(void);
438446
static void maybe_start_bgworkers(void);
447+
static bool maybe_reap_io_worker(int pid);
448+
static void maybe_adjust_io_workers(void);
439449
static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
440450
static PMChild *StartChildProcess(BackendType type);
441451
static void StartSysLogger(void);
@@ -1365,6 +1375,11 @@ PostmasterMain(int argc, char *argv[])
13651375
*/
13661376
AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STARTING);
13671377

1378+
UpdatePMState(PM_STARTUP);
1379+
1380+
/* Make sure we can perform I/O while starting up. */
1381+
maybe_adjust_io_workers();
1382+
13681383
/* Start bgwriter and checkpointer so they can help with recovery */
13691384
if (CheckpointerPMChild == NULL)
13701385
CheckpointerPMChild = StartChildProcess(B_CHECKPOINTER);
@@ -1377,7 +1392,6 @@ PostmasterMain(int argc, char *argv[])
13771392
StartupPMChild = StartChildProcess(B_STARTUP);
13781393
Assert(StartupPMChild != NULL);
13791394
StartupStatus = STARTUP_RUNNING;
1380-
UpdatePMState(PM_STARTUP);
13811395

13821396
/* Some workers may be scheduled to start now */
13831397
maybe_start_bgworkers();
@@ -2502,6 +2516,16 @@ process_pm_child_exit(void)
25022516
continue;
25032517
}
25042518

2519+
/* Was it an IO worker? */
2520+
if (maybe_reap_io_worker(pid))
2521+
{
2522+
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
2523+
HandleChildCrash(pid, exitstatus, _("io worker"));
2524+
2525+
maybe_adjust_io_workers();
2526+
continue;
2527+
}
2528+
25052529
/*
25062530
* Was it a backend or a background worker?
25072531
*/
@@ -2723,6 +2747,7 @@ HandleFatalError(QuitSignalReason reason, bool consider_sigabrt)
27232747
case PM_WAIT_XLOG_SHUTDOWN:
27242748
case PM_WAIT_XLOG_ARCHIVAL:
27252749
case PM_WAIT_CHECKPOINTER:
2750+
case PM_WAIT_IO_WORKERS:
27262751

27272752
/*
27282753
* NB: Similar code exists in PostmasterStateMachine()'s handling
@@ -2905,20 +2930,21 @@ PostmasterStateMachine(void)
29052930

29062931
/*
29072932
* If we are doing crash recovery or an immediate shutdown then we
2908-
* expect archiver, checkpointer and walsender to exit as well,
2909-
* otherwise not.
2933+
* expect archiver, checkpointer, io workers and walsender to exit as
2934+
* well, otherwise not.
29102935
*/
29112936
if (FatalError || Shutdown >= ImmediateShutdown)
29122937
targetMask = btmask_add(targetMask,
29132938
B_CHECKPOINTER,
29142939
B_ARCHIVER,
2940+
B_IO_WORKER,
29152941
B_WAL_SENDER);
29162942

29172943
/*
2918-
* Normally walsenders and archiver will continue running; they will
2919-
* be terminated later after writing the checkpoint record. We also
2920-
* let dead-end children to keep running for now. The syslogger
2921-
* process exits last.
2944+
* Normally archiver, checkpointer, IO workers and walsenders will
2945+
* continue running; they will be terminated later after writing the
2946+
* checkpoint record. We also let dead-end children to keep running
2947+
* for now. The syslogger process exits last.
29222948
*
29232949
* This assertion checks that we have covered all backend types,
29242950
* either by including them in targetMask, or by noting here that they
@@ -2933,12 +2959,13 @@ PostmasterStateMachine(void)
29332959
B_LOGGER);
29342960

29352961
/*
2936-
* Archiver, checkpointer and walsender may or may not be in
2937-
* targetMask already.
2962+
* Archiver, checkpointer, IO workers, and walsender may or may
2963+
* not be in targetMask already.
29382964
*/
29392965
remainMask = btmask_add(remainMask,
29402966
B_ARCHIVER,
29412967
B_CHECKPOINTER,
2968+
B_IO_WORKER,
29422969
B_WAL_SENDER);
29432970

29442971
/* these are not real postmaster children */
@@ -3039,11 +3066,25 @@ PostmasterStateMachine(void)
30393066
{
30403067
/*
30413068
* PM_WAIT_XLOG_ARCHIVAL state ends when there are no children other
3042-
* than checkpointer, dead-end children and logger left. There
3069+
* than checkpointer, io workers and dead-end children left. There
30433070
* shouldn't be any regular backends left by now anyway; what we're
30443071
* really waiting for is for walsenders and archiver to exit.
30453072
*/
3046-
if (CountChildren(btmask_all_except(B_CHECKPOINTER, B_LOGGER, B_DEAD_END_BACKEND)) == 0)
3073+
if (CountChildren(btmask_all_except(B_CHECKPOINTER, B_IO_WORKER,
3074+
B_LOGGER, B_DEAD_END_BACKEND)) == 0)
3075+
{
3076+
UpdatePMState(PM_WAIT_IO_WORKERS);
3077+
SignalChildren(SIGUSR2, btmask(B_IO_WORKER));
3078+
}
3079+
}
3080+
3081+
if (pmState == PM_WAIT_IO_WORKERS)
3082+
{
3083+
/*
3084+
* PM_WAIT_IO_WORKERS state ends when there's only checkpointer and
3085+
* dead_end children left.
3086+
*/
3087+
if (io_worker_count == 0)
30473088
{
30483089
UpdatePMState(PM_WAIT_CHECKPOINTER);
30493090

@@ -3171,10 +3212,14 @@ PostmasterStateMachine(void)
31713212
/* re-create shared memory and semaphores */
31723213
CreateSharedMemoryAndSemaphores();
31733214

3215+
UpdatePMState(PM_STARTUP);
3216+
3217+
/* Make sure we can perform I/O while starting up. */
3218+
maybe_adjust_io_workers();
3219+
31743220
StartupPMChild = StartChildProcess(B_STARTUP);
31753221
Assert(StartupPMChild != NULL);
31763222
StartupStatus = STARTUP_RUNNING;
3177-
UpdatePMState(PM_STARTUP);
31783223
/* crash recovery started, reset SIGKILL flag */
31793224
AbortStartTime = 0;
31803225

@@ -3198,6 +3243,7 @@ pmstate_name(PMState state)
31983243
PM_TOSTR_CASE(PM_WAIT_BACKENDS);
31993244
PM_TOSTR_CASE(PM_WAIT_XLOG_SHUTDOWN);
32003245
PM_TOSTR_CASE(PM_WAIT_XLOG_ARCHIVAL);
3246+
PM_TOSTR_CASE(PM_WAIT_IO_WORKERS);
32013247
PM_TOSTR_CASE(PM_WAIT_DEAD_END);
32023248
PM_TOSTR_CASE(PM_WAIT_CHECKPOINTER);
32033249
PM_TOSTR_CASE(PM_NO_CHILDREN);
@@ -3235,6 +3281,16 @@ LaunchMissingBackgroundProcesses(void)
32353281
if (SysLoggerPMChild == NULL && Logging_collector)
32363282
StartSysLogger();
32373283

3284+
/*
3285+
* The number of configured workers might have changed, or a prior start
3286+
* of a worker might have failed. Check if we need to start/stop any
3287+
* workers.
3288+
*
3289+
* A config file change will always lead to this function being called, so
3290+
* we always will process the config change in a timely manner.
3291+
*/
3292+
maybe_adjust_io_workers();
3293+
32383294
/*
32393295
* The checkpointer and the background writer are active from the start,
32403296
* until shutdown is initiated.
@@ -4120,6 +4176,7 @@ bgworker_should_start_now(BgWorkerStartTime start_time)
41204176
case PM_WAIT_DEAD_END:
41214177
case PM_WAIT_XLOG_ARCHIVAL:
41224178
case PM_WAIT_XLOG_SHUTDOWN:
4179+
case PM_WAIT_IO_WORKERS:
41234180
case PM_WAIT_BACKENDS:
41244181
case PM_STOP_BACKENDS:
41254182
break;
@@ -4270,6 +4327,99 @@ maybe_start_bgworkers(void)
42704327
}
42714328
}
42724329

4330+
static bool
4331+
maybe_reap_io_worker(int pid)
4332+
{
4333+
for (int id = 0; id < MAX_IO_WORKERS; ++id)
4334+
{
4335+
if (io_worker_children[id] &&
4336+
io_worker_children[id]->pid == pid)
4337+
{
4338+
ReleasePostmasterChildSlot(io_worker_children[id]);
4339+
4340+
--io_worker_count;
4341+
io_worker_children[id] = NULL;
4342+
return true;
4343+
}
4344+
}
4345+
return false;
4346+
}
4347+
4348+
/*
4349+
* Start or stop IO workers, to close the gap between the number of running
4350+
* workers and the number of configured workers. Used to respond to change of
4351+
* the io_workers GUC (by increasing and decreasing the number of workers), as
4352+
* well as workers terminating in response to errors (by starting
4353+
* "replacement" workers).
4354+
*/
4355+
static void
4356+
maybe_adjust_io_workers(void)
4357+
{
4358+
if (!pgaio_workers_enabled())
4359+
return;
4360+
4361+
/*
4362+
* If we're in final shutting down state, then we're just waiting for all
4363+
* processes to exit.
4364+
*/
4365+
if (pmState >= PM_WAIT_IO_WORKERS)
4366+
return;
4367+
4368+
/* Don't start new workers during an immediate shutdown either. */
4369+
if (Shutdown >= ImmediateShutdown)
4370+
return;
4371+
4372+
/*
4373+
* Don't start new workers if we're in the shutdown phase of a crash
4374+
* restart. But we *do* need to start if we're already starting up again.
4375+
*/
4376+
if (FatalError && pmState >= PM_STOP_BACKENDS)
4377+
return;
4378+
4379+
Assert(pmState < PM_WAIT_IO_WORKERS);
4380+
4381+
/* Not enough running? */
4382+
while (io_worker_count < io_workers)
4383+
{
4384+
PMChild *child;
4385+
int id;
4386+
4387+
/* find unused entry in io_worker_children array */
4388+
for (id = 0; id < MAX_IO_WORKERS; ++id)
4389+
{
4390+
if (io_worker_children[id] == NULL)
4391+
break;
4392+
}
4393+
if (id == MAX_IO_WORKERS)
4394+
elog(ERROR, "could not find a free IO worker ID");
4395+
4396+
/* Try to launch one. */
4397+
child = StartChildProcess(B_IO_WORKER);
4398+
if (child != NULL)
4399+
{
4400+
io_worker_children[id] = child;
4401+
++io_worker_count;
4402+
}
4403+
else
4404+
break; /* XXX try again soon? */
4405+
}
4406+
4407+
/* Too many running? */
4408+
if (io_worker_count > io_workers)
4409+
{
4410+
/* ask the IO worker in the highest slot to exit */
4411+
for (int id = MAX_IO_WORKERS - 1; id >= 0; --id)
4412+
{
4413+
if (io_worker_children[id] != NULL)
4414+
{
4415+
kill(io_worker_children[id]->pid, SIGUSR2);
4416+
break;
4417+
}
4418+
}
4419+
}
4420+
}
4421+
4422+
42734423
/*
42744424
* When a backend asks to be notified about worker state changes, we
42754425
* set a flag in its backend entry. The background worker machinery needs

src/backend/storage/aio/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ OBJS = \
1515
aio_io.o \
1616
aio_target.o \
1717
method_sync.o \
18+
method_worker.o \
1819
read_stream.o
1920

2021
include $(top_srcdir)/src/backend/common.mk

src/backend/storage/aio/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,6 @@ backend_sources += files(
77
'aio_io.c',
88
'aio_target.c',
99
'method_sync.c',
10+
'method_worker.c',
1011
'read_stream.c',
1112
)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy