Skip to content

Commit 9a740f8

Browse files
committed
Refactor code in charge of running shell-based recovery commands
The code specific to the execution of archive_cleanup_command, recovery_end_command and restore_command is moved to a new file named shell_restore.c. The code is split into three functions: - shell_restore(), that attempts the execution of a shell-based restore_command. - shell_archive_cleanup(), for archive_cleanup_command. - shell_recovery_end(), for recovery_end_command. This introduces no functional changes, with failure patterns and logs generated in consequence being the same as before (one case actually generates one less DEBUG2 message "could not restore" when a restore command succeeds but the follow-up stat() to check the size fails, but that only matters with a elevel high enough). This is preparatory work for allowing recovery modules, a facility similar to archive modules, with callbacks shaped similarly to the functions introduced here. Author: Nathan Bossart Reviewed-by: Andres Freund, Michael Paquier Discussion: https://postgr.es/m/20221227192449.GA3672473@nathanxps13
1 parent 02d3448 commit 9a740f8

File tree

6 files changed

+214
-127
lines changed

6 files changed

+214
-127
lines changed

src/backend/access/transam/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ OBJS = \
1919
multixact.o \
2020
parallel.o \
2121
rmgr.o \
22+
shell_restore.o \
2223
slru.o \
2324
subtrans.o \
2425
timeline.o \

src/backend/access/transam/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ backend_sources += files(
77
'multixact.c',
88
'parallel.c',
99
'rmgr.c',
10+
'shell_restore.c',
1011
'slru.c',
1112
'subtrans.c',
1213
'timeline.c',
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
/*-------------------------------------------------------------------------
2+
*
3+
* shell_restore.c
4+
* Recovery functions for a user-specified shell command.
5+
*
6+
* These recovery functions use a user-specified shell command (e.g. based
7+
* on the GUC restore_command).
8+
*
9+
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
10+
* Portions Copyright (c) 1994, Regents of the University of California
11+
*
12+
* src/backend/access/transam/shell_restore.c
13+
*
14+
*-------------------------------------------------------------------------
15+
*/
16+
17+
#include "postgres.h"
18+
19+
#include <signal.h>
20+
21+
#include "access/xlogarchive.h"
22+
#include "access/xlogrecovery.h"
23+
#include "common/archive.h"
24+
#include "common/percentrepl.h"
25+
#include "storage/ipc.h"
26+
#include "utils/wait_event.h"
27+
28+
static void ExecuteRecoveryCommand(const char *command,
29+
const char *commandName,
30+
bool failOnSignal,
31+
uint32 wait_event_info,
32+
const char *lastRestartPointFileName);
33+
34+
/*
35+
* Attempt to execute a shell-based restore command.
36+
*
37+
* Returns true if the command has succeeded, false otherwise.
38+
*/
39+
bool
40+
shell_restore(const char *file, const char *path,
41+
const char *lastRestartPointFileName)
42+
{
43+
char *cmd;
44+
int rc;
45+
46+
/* Build the restore command to execute */
47+
cmd = BuildRestoreCommand(recoveryRestoreCommand, path, file,
48+
lastRestartPointFileName);
49+
50+
ereport(DEBUG3,
51+
(errmsg_internal("executing restore command \"%s\"", cmd)));
52+
53+
/*
54+
* Copy xlog from archival storage to XLOGDIR
55+
*/
56+
fflush(NULL);
57+
pgstat_report_wait_start(WAIT_EVENT_RESTORE_COMMAND);
58+
rc = system(cmd);
59+
pgstat_report_wait_end();
60+
61+
pfree(cmd);
62+
63+
/*
64+
* Remember, we rollforward UNTIL the restore fails so failure here is
65+
* just part of the process... that makes it difficult to determine
66+
* whether the restore failed because there isn't an archive to restore,
67+
* or because the administrator has specified the restore program
68+
* incorrectly. We have to assume the former.
69+
*
70+
* However, if the failure was due to any sort of signal, it's best to
71+
* punt and abort recovery. (If we "return false" here, upper levels will
72+
* assume that recovery is complete and start up the database!) It's
73+
* essential to abort on child SIGINT and SIGQUIT, because per spec
74+
* system() ignores SIGINT and SIGQUIT while waiting; if we see one of
75+
* those it's a good bet we should have gotten it too.
76+
*
77+
* On SIGTERM, assume we have received a fast shutdown request, and exit
78+
* cleanly. It's pure chance whether we receive the SIGTERM first, or the
79+
* child process. If we receive it first, the signal handler will call
80+
* proc_exit, otherwise we do it here. If we or the child process received
81+
* SIGTERM for any other reason than a fast shutdown request, postmaster
82+
* will perform an immediate shutdown when it sees us exiting
83+
* unexpectedly.
84+
*
85+
* We treat hard shell errors such as "command not found" as fatal, too.
86+
*/
87+
if (rc != 0)
88+
{
89+
if (wait_result_is_signal(rc, SIGTERM))
90+
proc_exit(1);
91+
92+
ereport(wait_result_is_any_signal(rc, true) ? FATAL : DEBUG2,
93+
(errmsg("could not restore file \"%s\" from archive: %s",
94+
file, wait_result_to_str(rc))));
95+
}
96+
97+
return (rc == 0);
98+
}
99+
100+
/*
101+
* Attempt to execute a shell-based archive cleanup command.
102+
*/
103+
void
104+
shell_archive_cleanup(const char *lastRestartPointFileName)
105+
{
106+
ExecuteRecoveryCommand(archiveCleanupCommand, "archive_cleanup_command",
107+
false, WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND,
108+
lastRestartPointFileName);
109+
}
110+
111+
/*
112+
* Attempt to execute a shell-based end-of-recovery command.
113+
*/
114+
void
115+
shell_recovery_end(const char *lastRestartPointFileName)
116+
{
117+
ExecuteRecoveryCommand(recoveryEndCommand, "recovery_end_command", true,
118+
WAIT_EVENT_RECOVERY_END_COMMAND,
119+
lastRestartPointFileName);
120+
}
121+
122+
/*
123+
* Attempt to execute an external shell command during recovery.
124+
*
125+
* 'command' is the shell command to be executed, 'commandName' is a
126+
* human-readable name describing the command emitted in the logs. If
127+
* 'failOnSignal' is true and the command is killed by a signal, a FATAL
128+
* error is thrown. Otherwise a WARNING is emitted.
129+
*
130+
* This is currently used for recovery_end_command and archive_cleanup_command.
131+
*/
132+
static void
133+
ExecuteRecoveryCommand(const char *command, const char *commandName,
134+
bool failOnSignal, uint32 wait_event_info,
135+
const char *lastRestartPointFileName)
136+
{
137+
char *xlogRecoveryCmd;
138+
int rc;
139+
140+
Assert(command && commandName);
141+
142+
/*
143+
* construct the command to be executed
144+
*/
145+
xlogRecoveryCmd = replace_percent_placeholders(command, commandName, "r",
146+
lastRestartPointFileName);
147+
148+
ereport(DEBUG3,
149+
(errmsg_internal("executing %s \"%s\"", commandName, command)));
150+
151+
/*
152+
* execute the constructed command
153+
*/
154+
fflush(NULL);
155+
pgstat_report_wait_start(wait_event_info);
156+
rc = system(xlogRecoveryCmd);
157+
pgstat_report_wait_end();
158+
159+
pfree(xlogRecoveryCmd);
160+
161+
if (rc != 0)
162+
{
163+
/*
164+
* If the failure was due to any sort of signal, it's best to punt and
165+
* abort recovery. See comments in shell_restore().
166+
*/
167+
ereport((failOnSignal && wait_result_is_any_signal(rc, true)) ? FATAL : WARNING,
168+
/*------
169+
translator: First %s represents a postgresql.conf parameter name like
170+
"recovery_end_command", the 2nd is the value of that parameter, the
171+
third an already translated error message. */
172+
(errmsg("%s \"%s\": %s", commandName,
173+
command, wait_result_to_str(rc))));
174+
}
175+
}

src/backend/access/transam/xlog.c

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,7 @@ static char *GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli);
692692
static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos);
693693
static XLogRecPtr XLogBytePosToEndRecPtr(uint64 bytepos);
694694
static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr);
695+
static void GetOldestRestartPointFileName(char *fname);
695696

696697
static void WALInsertLockAcquire(void);
697698
static void WALInsertLockAcquireExclusive(void);
@@ -4887,10 +4888,12 @@ CleanupAfterArchiveRecovery(TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog,
48874888
* Execute the recovery_end_command, if any.
48884889
*/
48894890
if (recoveryEndCommand && strcmp(recoveryEndCommand, "") != 0)
4890-
ExecuteRecoveryCommand(recoveryEndCommand,
4891-
"recovery_end_command",
4892-
true,
4893-
WAIT_EVENT_RECOVERY_END_COMMAND);
4891+
{
4892+
char lastRestartPointFname[MAXFNAMELEN];
4893+
4894+
GetOldestRestartPointFileName(lastRestartPointFname);
4895+
shell_recovery_end(lastRestartPointFname);
4896+
}
48944897

48954898
/*
48964899
* We switched to a new timeline. Clean up segments on the old timeline.
@@ -7307,10 +7310,12 @@ CreateRestartPoint(int flags)
73077310
* Finally, execute archive_cleanup_command, if any.
73087311
*/
73097312
if (archiveCleanupCommand && strcmp(archiveCleanupCommand, "") != 0)
7310-
ExecuteRecoveryCommand(archiveCleanupCommand,
7311-
"archive_cleanup_command",
7312-
false,
7313-
WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND);
7313+
{
7314+
char lastRestartPointFname[MAXFNAMELEN];
7315+
7316+
GetOldestRestartPointFileName(lastRestartPointFname);
7317+
shell_archive_cleanup(lastRestartPointFname);
7318+
}
73147319

73157320
return true;
73167321
}
@@ -8884,6 +8889,22 @@ GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli)
88848889
LWLockRelease(ControlFileLock);
88858890
}
88868891

8892+
/*
8893+
* Returns the WAL file name for the last checkpoint or restartpoint. This is
8894+
* the oldest WAL file that we still need if we have to restart recovery.
8895+
*/
8896+
static void
8897+
GetOldestRestartPointFileName(char *fname)
8898+
{
8899+
XLogRecPtr restartRedoPtr;
8900+
TimeLineID restartTli;
8901+
XLogSegNo restartSegNo;
8902+
8903+
GetOldestRestartPoint(&restartRedoPtr, &restartTli);
8904+
XLByteToSeg(restartRedoPtr, restartSegNo, wal_segment_size);
8905+
XLogFileName(fname, restartTli, restartSegNo, wal_segment_size);
8906+
}
8907+
88878908
/* Thin wrapper around ShutdownWalRcv(). */
88888909
void
88898910
XLogShutdownWalRcv(void)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy