Skip to content

Commit 079ac29

Browse files
committed
Move the server's backup manifest code to a separate file.
basebackup.c is already a pretty big and complicated file, so it makes more sense to keep the backup manifest support routines in a separate file, for clarity and ease of maintenance. Discussion: http://postgr.es/m/CA+TgmoavRak5OdP76P8eJExDYhPEKWjMb0sxW7dF01dWFgE=uA@mail.gmail.com
1 parent 1e324cb commit 079ac29

File tree

4 files changed

+429
-389
lines changed

4 files changed

+429
-389
lines changed

src/backend/replication/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ include $(top_builddir)/src/Makefile.global
1515
override CPPFLAGS := -I. -I$(srcdir) $(CPPFLAGS)
1616

1717
OBJS = \
18+
backup_manifest.o \
1819
basebackup.o \
1920
repl_gram.o \
2021
slot.o \
Lines changed: 375 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,375 @@
1+
/*-------------------------------------------------------------------------
2+
*
3+
* backup_manifest.c
4+
* code for generating and sending a backup manifest
5+
*
6+
* Portions Copyright (c) 2010-2020, PostgreSQL Global Development Group
7+
*
8+
* IDENTIFICATION
9+
* src/backend/replication/backup_manifest.c
10+
*
11+
*-------------------------------------------------------------------------
12+
*/
13+
#include "postgres.h"
14+
15+
#include "access/timeline.h"
16+
#include "libpq/libpq.h"
17+
#include "libpq/pqformat.h"
18+
#include "mb/pg_wchar.h"
19+
#include "replication/backup_manifest.h"
20+
#include "utils/builtins.h"
21+
#include "utils/json.h"
22+
23+
/*
24+
* Does the user want a backup manifest?
25+
*
26+
* It's simplest to always have a manifest_info object, so that we don't need
27+
* checks for NULL pointers in too many places. However, if the user doesn't
28+
* want a manifest, we set manifest->buffile to NULL.
29+
*/
30+
static inline bool
31+
IsManifestEnabled(manifest_info *manifest)
32+
{
33+
return (manifest->buffile != NULL);
34+
}
35+
36+
/*
37+
* Convenience macro for appending data to the backup manifest.
38+
*/
39+
#define AppendToManifest(manifest, ...) \
40+
{ \
41+
char *_manifest_s = psprintf(__VA_ARGS__); \
42+
AppendStringToManifest(manifest, _manifest_s); \
43+
pfree(_manifest_s); \
44+
}
45+
46+
/*
47+
* Initialize state so that we can construct a backup manifest.
48+
*
49+
* NB: Although the checksum type for the data files is configurable, the
50+
* checksum for the manifest itself always uses SHA-256. See comments in
51+
* SendBackupManifest.
52+
*/
53+
void
54+
InitializeManifest(manifest_info *manifest, manifest_option want_manifest,
55+
pg_checksum_type manifest_checksum_type)
56+
{
57+
if (want_manifest == MANIFEST_OPTION_NO)
58+
manifest->buffile = NULL;
59+
else
60+
manifest->buffile = BufFileCreateTemp(false);
61+
manifest->checksum_type = manifest_checksum_type;
62+
pg_sha256_init(&manifest->manifest_ctx);
63+
manifest->manifest_size = UINT64CONST(0);
64+
manifest->force_encode = (want_manifest == MANIFEST_OPTION_FORCE_ENCODE);
65+
manifest->first_file = true;
66+
manifest->still_checksumming = true;
67+
68+
if (want_manifest != MANIFEST_OPTION_NO)
69+
AppendToManifest(manifest,
70+
"{ \"PostgreSQL-Backup-Manifest-Version\": 1,\n"
71+
"\"Files\": [");
72+
}
73+
74+
/*
75+
* Append a cstring to the manifest.
76+
*/
77+
void
78+
AppendStringToManifest(manifest_info *manifest, char *s)
79+
{
80+
int len = strlen(s);
81+
size_t written;
82+
83+
Assert(manifest != NULL);
84+
if (manifest->still_checksumming)
85+
pg_sha256_update(&manifest->manifest_ctx, (uint8 *) s, len);
86+
written = BufFileWrite(manifest->buffile, s, len);
87+
if (written != len)
88+
ereport(ERROR,
89+
(errcode_for_file_access(),
90+
errmsg("could not write to temporary file: %m")));
91+
manifest->manifest_size += len;
92+
}
93+
94+
/*
95+
* Add an entry to the backup manifest for a file.
96+
*/
97+
void
98+
AddFileToManifest(manifest_info *manifest, const char *spcoid,
99+
const char *pathname, size_t size, pg_time_t mtime,
100+
pg_checksum_context *checksum_ctx)
101+
{
102+
char pathbuf[MAXPGPATH];
103+
int pathlen;
104+
StringInfoData buf;
105+
106+
if (!IsManifestEnabled(manifest))
107+
return;
108+
109+
/*
110+
* If this file is part of a tablespace, the pathname passed to this
111+
* function will be relative to the tar file that contains it. We want the
112+
* pathname relative to the data directory (ignoring the intermediate
113+
* symlink traversal).
114+
*/
115+
if (spcoid != NULL)
116+
{
117+
snprintf(pathbuf, sizeof(pathbuf), "pg_tblspc/%s/%s", spcoid,
118+
pathname);
119+
pathname = pathbuf;
120+
}
121+
122+
/*
123+
* Each file's entry needs to be separated from any entry that follows by a
124+
* comma, but there's no comma before the first one or after the last one.
125+
* To make that work, adding a file to the manifest starts by terminating
126+
* the most recently added line, with a comma if appropriate, but does not
127+
* terminate the line inserted for this file.
128+
*/
129+
initStringInfo(&buf);
130+
if (manifest->first_file)
131+
{
132+
appendStringInfoString(&buf, "\n");
133+
manifest->first_file = false;
134+
}
135+
else
136+
appendStringInfoString(&buf, ",\n");
137+
138+
/*
139+
* Write the relative pathname to this file out to the manifest. The
140+
* manifest is always stored in UTF-8, so we have to encode paths that are
141+
* not valid in that encoding.
142+
*/
143+
pathlen = strlen(pathname);
144+
if (!manifest->force_encode &&
145+
pg_verify_mbstr(PG_UTF8, pathname, pathlen, true))
146+
{
147+
appendStringInfoString(&buf, "{ \"Path\": ");
148+
escape_json(&buf, pathname);
149+
appendStringInfoString(&buf, ", ");
150+
}
151+
else
152+
{
153+
appendStringInfoString(&buf, "{ \"Encoded-Path\": \"");
154+
enlargeStringInfo(&buf, 2 * pathlen);
155+
buf.len += hex_encode((char *) pathname, pathlen,
156+
&buf.data[buf.len]);
157+
appendStringInfoString(&buf, "\", ");
158+
}
159+
160+
appendStringInfo(&buf, "\"Size\": %zu, ", size);
161+
162+
/*
163+
* Convert last modification time to a string and append it to the
164+
* manifest. Since it's not clear what time zone to use and since time
165+
* zone definitions can change, possibly causing confusion, use GMT
166+
* always.
167+
*/
168+
appendStringInfoString(&buf, "\"Last-Modified\": \"");
169+
enlargeStringInfo(&buf, 128);
170+
buf.len += pg_strftime(&buf.data[buf.len], 128, "%Y-%m-%d %H:%M:%S %Z",
171+
pg_gmtime(&mtime));
172+
appendStringInfoString(&buf, "\"");
173+
174+
/* Add checksum information. */
175+
if (checksum_ctx->type != CHECKSUM_TYPE_NONE)
176+
{
177+
uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH];
178+
int checksumlen;
179+
180+
checksumlen = pg_checksum_final(checksum_ctx, checksumbuf);
181+
182+
appendStringInfo(&buf,
183+
", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"",
184+
pg_checksum_type_name(checksum_ctx->type));
185+
enlargeStringInfo(&buf, 2 * checksumlen);
186+
buf.len += hex_encode((char *) checksumbuf, checksumlen,
187+
&buf.data[buf.len]);
188+
appendStringInfoString(&buf, "\"");
189+
}
190+
191+
/* Close out the object. */
192+
appendStringInfoString(&buf, " }");
193+
194+
/* OK, add it to the manifest. */
195+
AppendStringToManifest(manifest, buf.data);
196+
197+
/* Avoid leaking memory. */
198+
pfree(buf.data);
199+
}
200+
201+
/*
202+
* Add information about the WAL that will need to be replayed when restoring
203+
* this backup to the manifest.
204+
*/
205+
void
206+
AddWALInfoToManifest(manifest_info *manifest, XLogRecPtr startptr,
207+
TimeLineID starttli, XLogRecPtr endptr, TimeLineID endtli)
208+
{
209+
List *timelines;
210+
ListCell *lc;
211+
bool first_wal_range = true;
212+
bool found_start_timeline = false;
213+
214+
if (!IsManifestEnabled(manifest))
215+
return;
216+
217+
/* Terminate the list of files. */
218+
AppendStringToManifest(manifest, "\n],\n");
219+
220+
/* Read the timeline history for the ending timeline. */
221+
timelines = readTimeLineHistory(endtli);
222+
223+
/* Start a list of LSN ranges. */
224+
AppendStringToManifest(manifest, "\"WAL-Ranges\": [\n");
225+
226+
foreach(lc, timelines)
227+
{
228+
TimeLineHistoryEntry *entry = lfirst(lc);
229+
XLogRecPtr tl_beginptr;
230+
231+
/*
232+
* We only care about timelines that were active during the backup.
233+
* Skip any that ended before the backup started. (Note that if
234+
* entry->end is InvalidXLogRecPtr, it means that the timeline has not
235+
* yet ended.)
236+
*/
237+
if (!XLogRecPtrIsInvalid(entry->end) && entry->end < startptr)
238+
continue;
239+
240+
/*
241+
* Because the timeline history file lists newer timelines before
242+
* older ones, the first timeline we encounter that is new enough to
243+
* matter ought to match the ending timeline of the backup.
244+
*/
245+
if (first_wal_range && endtli != entry->tli)
246+
ereport(ERROR,
247+
errmsg("expected end timeline %u but found timeline %u",
248+
starttli, entry->tli));
249+
250+
if (!XLogRecPtrIsInvalid(entry->begin))
251+
tl_beginptr = entry->begin;
252+
else
253+
{
254+
tl_beginptr = startptr;
255+
256+
/*
257+
* If we reach a TLI that has no valid beginning LSN, there can't
258+
* be any more timelines in the history after this point, so we'd
259+
* better have arrived at the expected starting TLI. If not,
260+
* something's gone horribly wrong.
261+
*/
262+
if (starttli != entry->tli)
263+
ereport(ERROR,
264+
errmsg("expected start timeline %u but found timeline %u",
265+
starttli, entry->tli));
266+
}
267+
268+
AppendToManifest(manifest,
269+
"%s{ \"Timeline\": %u, \"Start-LSN\": \"%X/%X\", \"End-LSN\": \"%X/%X\" }",
270+
first_wal_range ? "" : ",\n",
271+
entry->tli,
272+
(uint32) (tl_beginptr >> 32), (uint32) tl_beginptr,
273+
(uint32) (endptr >> 32), (uint32) endptr);
274+
275+
if (starttli == entry->tli)
276+
{
277+
found_start_timeline = true;
278+
break;
279+
}
280+
281+
endptr = entry->begin;
282+
first_wal_range = false;
283+
}
284+
285+
/*
286+
* The last entry in the timeline history for the ending timeline should
287+
* be the ending timeline itself. Verify that this is what we observed.
288+
*/
289+
if (!found_start_timeline)
290+
ereport(ERROR,
291+
errmsg("start timeline %u not found history of timeline %u",
292+
starttli, endtli));
293+
294+
/* Terminate the list of WAL ranges. */
295+
AppendStringToManifest(manifest, "\n],\n");
296+
}
297+
298+
/*
299+
* Finalize the backup manifest, and send it to the client.
300+
*/
301+
void
302+
SendBackupManifest(manifest_info *manifest)
303+
{
304+
StringInfoData protobuf;
305+
uint8 checksumbuf[PG_SHA256_DIGEST_LENGTH];
306+
char checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH];
307+
size_t manifest_bytes_done = 0;
308+
309+
if (!IsManifestEnabled(manifest))
310+
return;
311+
312+
/*
313+
* Append manifest checksum, so that the problems with the manifest itself
314+
* can be detected.
315+
*
316+
* We always use SHA-256 for this, regardless of what algorithm is chosen
317+
* for checksumming the files. If we ever want to make the checksum
318+
* algorithm used for the manifest file variable, the client will need a
319+
* way to figure out which algorithm to use as close to the beginning of
320+
* the manifest file as possible, to avoid having to read the whole thing
321+
* twice.
322+
*/
323+
manifest->still_checksumming = false;
324+
pg_sha256_final(&manifest->manifest_ctx, checksumbuf);
325+
AppendStringToManifest(manifest, "\"Manifest-Checksum\": \"");
326+
hex_encode((char *) checksumbuf, sizeof checksumbuf, checksumstringbuf);
327+
checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH - 1] = '\0';
328+
AppendStringToManifest(manifest, checksumstringbuf);
329+
AppendStringToManifest(manifest, "\"}\n");
330+
331+
/*
332+
* We've written all the data to the manifest file. Rewind the file so
333+
* that we can read it all back.
334+
*/
335+
if (BufFileSeek(manifest->buffile, 0, 0L, SEEK_SET))
336+
ereport(ERROR,
337+
(errcode_for_file_access(),
338+
errmsg("could not rewind temporary file: %m")));
339+
340+
/* Send CopyOutResponse message */
341+
pq_beginmessage(&protobuf, 'H');
342+
pq_sendbyte(&protobuf, 0); /* overall format */
343+
pq_sendint16(&protobuf, 0); /* natts */
344+
pq_endmessage(&protobuf);
345+
346+
/*
347+
* Send CopyData messages.
348+
*
349+
* We choose to read back the data from the temporary file in chunks of
350+
* size BLCKSZ; this isn't necessary, but buffile.c uses that as the I/O
351+
* size, so it seems to make sense to match that value here.
352+
*/
353+
while (manifest_bytes_done < manifest->manifest_size)
354+
{
355+
char manifestbuf[BLCKSZ];
356+
size_t bytes_to_read;
357+
size_t rc;
358+
359+
bytes_to_read = Min(sizeof(manifestbuf),
360+
manifest->manifest_size - manifest_bytes_done);
361+
rc = BufFileRead(manifest->buffile, manifestbuf, bytes_to_read);
362+
if (rc != bytes_to_read)
363+
ereport(ERROR,
364+
(errcode_for_file_access(),
365+
errmsg("could not read from temporary file: %m")));
366+
pq_putmessage('d', manifestbuf, bytes_to_read);
367+
manifest_bytes_done += bytes_to_read;
368+
}
369+
370+
/* No more data, so send CopyDone message */
371+
pq_putemptymessage('c');
372+
373+
/* Release resources */
374+
BufFileClose(manifest->buffile);
375+
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy