Skip to content

Commit 222e11a

Browse files
committed
Use incremental parsing of backup manifests.
This changes the three callers to json_parse_manifest() to use json_parse_manifest_incremental_chunk() if appropriate. In the case of the backend caller, since we don't know the size of the manifest in advance we always call the incremental parser. Author: Andrew Dunstan Reviewed-By: Jacob Champion Discussion: https://postgr.es/m/7b0a51d6-0d9d-7366-3a1a-f74397a02f55@dunslane.net
1 parent ea7b4e9 commit 222e11a

File tree

3 files changed

+178
-62
lines changed

3 files changed

+178
-62
lines changed

src/backend/backup/basebackup_incremental.c

Lines changed: 42 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,14 @@
3333

3434
#define BLOCKS_PER_READ 512
3535

36+
/*
37+
* we expect the find the last lines of the manifest, including the checksum,
38+
* in the last MIN_CHUNK bytes of the manifest. We trigger an incremental
39+
* parse step if we are about to overflow MAX_CHUNK bytes.
40+
*/
41+
#define MIN_CHUNK 1024
42+
#define MAX_CHUNK (128 * 1024)
43+
3644
/*
3745
* Details extracted from the WAL ranges present in the supplied backup manifest.
3846
*/
@@ -112,6 +120,11 @@ struct IncrementalBackupInfo
112120
* turns out to be a problem in practice, we'll need to be more clever.
113121
*/
114122
BlockRefTable *brtab;
123+
124+
/*
125+
* State object for incremental JSON parsing
126+
*/
127+
JsonManifestParseIncrementalState *inc_state;
115128
};
116129

117130
static void manifest_process_version(JsonManifestParseContext *context,
@@ -142,6 +155,7 @@ CreateIncrementalBackupInfo(MemoryContext mcxt)
142155
{
143156
IncrementalBackupInfo *ib;
144157
MemoryContext oldcontext;
158+
JsonManifestParseContext *context;
145159

146160
oldcontext = MemoryContextSwitchTo(mcxt);
147161

@@ -157,6 +171,17 @@ CreateIncrementalBackupInfo(MemoryContext mcxt)
157171
*/
158172
ib->manifest_files = backup_file_create(mcxt, 10000, NULL);
159173

174+
context = palloc0(sizeof(JsonManifestParseContext));
175+
/* Parse the manifest. */
176+
context->private_data = ib;
177+
context->version_cb = manifest_process_version;
178+
context->system_identifier_cb = manifest_process_system_identifier;
179+
context->per_file_cb = manifest_process_file;
180+
context->per_wal_range_cb = manifest_process_wal_range;
181+
context->error_cb = manifest_report_error;
182+
183+
ib->inc_state = json_parse_manifest_incremental_init(context);
184+
160185
MemoryContextSwitchTo(oldcontext);
161186

162187
return ib;
@@ -176,13 +201,20 @@ AppendIncrementalManifestData(IncrementalBackupInfo *ib, const char *data,
176201
/* Switch to our memory context. */
177202
oldcontext = MemoryContextSwitchTo(ib->mcxt);
178203

179-
/*
180-
* XXX. Our json parser is at present incapable of parsing json blobs
181-
* incrementally, so we have to accumulate the entire backup manifest
182-
* before we can do anything with it. This should really be fixed, since
183-
* some users might have very large numbers of files in the data
184-
* directory.
185-
*/
204+
if (ib->buf.len > MIN_CHUNK && ib->buf.len + len > MAX_CHUNK)
205+
{
206+
/*
207+
* time for an incremental parse. We'll do all but the last MIN_CHUNK
208+
* so that we have enough left for the final piece.
209+
*/
210+
json_parse_manifest_incremental_chunk(
211+
ib->inc_state, ib->buf.data, ib->buf.len - MIN_CHUNK, false);
212+
/* now remove what we just parsed */
213+
memmove(ib->buf.data, ib->buf.data + (ib->buf.len - MIN_CHUNK),
214+
MIN_CHUNK + 1);
215+
ib->buf.len = MIN_CHUNK;
216+
}
217+
186218
appendBinaryStringInfo(&ib->buf, data, len);
187219

188220
/* Switch back to previous memory context. */
@@ -196,20 +228,14 @@ AppendIncrementalManifestData(IncrementalBackupInfo *ib, const char *data,
196228
void
197229
FinalizeIncrementalManifest(IncrementalBackupInfo *ib)
198230
{
199-
JsonManifestParseContext context;
200231
MemoryContext oldcontext;
201232

202233
/* Switch to our memory context. */
203234
oldcontext = MemoryContextSwitchTo(ib->mcxt);
204235

205-
/* Parse the manifest. */
206-
context.private_data = ib;
207-
context.version_cb = manifest_process_version;
208-
context.system_identifier_cb = manifest_process_system_identifier;
209-
context.per_file_cb = manifest_process_file;
210-
context.per_wal_range_cb = manifest_process_wal_range;
211-
context.error_cb = manifest_report_error;
212-
json_parse_manifest(&context, ib->buf.data, ib->buf.len);
236+
/* Parse the last chunk of the manifest */
237+
json_parse_manifest_incremental_chunk(
238+
ib->inc_state, ib->buf.data, ib->buf.len, true);
213239

214240
/* Done with the buffer, so release memory. */
215241
pfree(ib->buf.data);

src/bin/pg_combinebackup/load_manifest.c

Lines changed: 70 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@
3434
*/
3535
#define ESTIMATED_BYTES_PER_MANIFEST_LINE 100
3636

37+
/*
38+
* size of json chunk to be read in
39+
*
40+
*/
41+
#define READ_CHUNK_SIZE (128 * 1024)
42+
3743
/*
3844
* Define a hash table which we can use to store information about the files
3945
* mentioned in the backup manifest.
@@ -109,6 +115,7 @@ load_backup_manifest(char *backup_directory)
109115
int rc;
110116
JsonManifestParseContext context;
111117
manifest_data *result;
118+
int chunk_size = READ_CHUNK_SIZE;
112119

113120
/* Open the manifest file. */
114121
snprintf(pathname, MAXPGPATH, "%s/backup_manifest", backup_directory);
@@ -133,27 +140,6 @@ load_backup_manifest(char *backup_directory)
133140
/* Create the hash table. */
134141
ht = manifest_files_create(initial_size, NULL);
135142

136-
/*
137-
* Slurp in the whole file.
138-
*
139-
* This is not ideal, but there's currently no way to get pg_parse_json()
140-
* to perform incremental parsing.
141-
*/
142-
buffer = pg_malloc(statbuf.st_size);
143-
rc = read(fd, buffer, statbuf.st_size);
144-
if (rc != statbuf.st_size)
145-
{
146-
if (rc < 0)
147-
pg_fatal("could not read file \"%s\": %m", pathname);
148-
else
149-
pg_fatal("could not read file \"%s\": read %d of %lld",
150-
pathname, rc, (long long int) statbuf.st_size);
151-
}
152-
153-
/* Close the manifest file. */
154-
close(fd);
155-
156-
/* Parse the manifest. */
157143
result = pg_malloc0(sizeof(manifest_data));
158144
result->files = ht;
159145
context.private_data = result;
@@ -162,7 +148,69 @@ load_backup_manifest(char *backup_directory)
162148
context.per_file_cb = combinebackup_per_file_cb;
163149
context.per_wal_range_cb = combinebackup_per_wal_range_cb;
164150
context.error_cb = report_manifest_error;
165-
json_parse_manifest(&context, buffer, statbuf.st_size);
151+
152+
/*
153+
* Parse the file, in chunks if necessary.
154+
*/
155+
if (statbuf.st_size <= chunk_size)
156+
{
157+
buffer = pg_malloc(statbuf.st_size);
158+
rc = read(fd, buffer, statbuf.st_size);
159+
if (rc != statbuf.st_size)
160+
{
161+
if (rc < 0)
162+
pg_fatal("could not read file \"%s\": %m", pathname);
163+
else
164+
pg_fatal("could not read file \"%s\": read %d of %lld",
165+
pathname, rc, (long long int) statbuf.st_size);
166+
}
167+
168+
/* Close the manifest file. */
169+
close(fd);
170+
171+
/* Parse the manifest. */
172+
json_parse_manifest(&context, buffer, statbuf.st_size);
173+
}
174+
else
175+
{
176+
int bytes_left = statbuf.st_size;
177+
JsonManifestParseIncrementalState *inc_state;
178+
179+
inc_state = json_parse_manifest_incremental_init(&context);
180+
181+
buffer = pg_malloc(chunk_size + 1);
182+
183+
while (bytes_left > 0)
184+
{
185+
int bytes_to_read = chunk_size;
186+
187+
/*
188+
* Make sure that the last chunk is sufficiently large. (i.e. at
189+
* least half the chunk size) so that it will contain fully the
190+
* piece at the end with the checksum.
191+
*/
192+
if (bytes_left < chunk_size)
193+
bytes_to_read = bytes_left;
194+
else if (bytes_left < 2 * chunk_size)
195+
bytes_to_read = bytes_left / 2;
196+
rc = read(fd, buffer, bytes_to_read);
197+
if (rc != bytes_to_read)
198+
{
199+
if (rc < 0)
200+
pg_fatal("could not read file \"%s\": %m", pathname);
201+
else
202+
pg_fatal("could not read file \"%s\": read %lld of %lld",
203+
pathname,
204+
(long long int) (statbuf.st_size + rc - bytes_left),
205+
(long long int) statbuf.st_size);
206+
}
207+
bytes_left -= rc;
208+
json_parse_manifest_incremental_chunk(
209+
inc_state, buffer, rc, bytes_left == 0);
210+
}
211+
212+
close(fd);
213+
}
166214

167215
/* All done. */
168216
pfree(buffer);

src/bin/pg_verifybackup/pg_verifybackup.c

Lines changed: 66 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
/*
4444
* How many bytes should we try to read from a file at once?
4545
*/
46-
#define READ_CHUNK_SIZE 4096
46+
#define READ_CHUNK_SIZE (128 * 1024)
4747

4848
/*
4949
* Each file described by the manifest file is parsed to produce an object
@@ -399,6 +399,8 @@ parse_manifest_file(char *manifest_path)
399399
JsonManifestParseContext context;
400400
manifest_data *result;
401401

402+
int chunk_size = READ_CHUNK_SIZE;
403+
402404
/* Open the manifest file. */
403405
if ((fd = open(manifest_path, O_RDONLY | PG_BINARY, 0)) < 0)
404406
report_fatal_error("could not open file \"%s\": %m", manifest_path);
@@ -414,28 +416,6 @@ parse_manifest_file(char *manifest_path)
414416
/* Create the hash table. */
415417
ht = manifest_files_create(initial_size, NULL);
416418

417-
/*
418-
* Slurp in the whole file.
419-
*
420-
* This is not ideal, but there's currently no easy way to get
421-
* pg_parse_json() to perform incremental parsing.
422-
*/
423-
buffer = pg_malloc(statbuf.st_size);
424-
rc = read(fd, buffer, statbuf.st_size);
425-
if (rc != statbuf.st_size)
426-
{
427-
if (rc < 0)
428-
report_fatal_error("could not read file \"%s\": %m",
429-
manifest_path);
430-
else
431-
report_fatal_error("could not read file \"%s\": read %d of %lld",
432-
manifest_path, rc, (long long int) statbuf.st_size);
433-
}
434-
435-
/* Close the manifest file. */
436-
close(fd);
437-
438-
/* Parse the manifest. */
439419
result = pg_malloc0(sizeof(manifest_data));
440420
result->files = ht;
441421
context.private_data = result;
@@ -444,7 +424,69 @@ parse_manifest_file(char *manifest_path)
444424
context.per_file_cb = verifybackup_per_file_cb;
445425
context.per_wal_range_cb = verifybackup_per_wal_range_cb;
446426
context.error_cb = report_manifest_error;
447-
json_parse_manifest(&context, buffer, statbuf.st_size);
427+
428+
/*
429+
* Parse the file, in chunks if necessary.
430+
*/
431+
if (statbuf.st_size <= chunk_size)
432+
{
433+
buffer = pg_malloc(statbuf.st_size);
434+
rc = read(fd, buffer, statbuf.st_size);
435+
if (rc != statbuf.st_size)
436+
{
437+
if (rc < 0)
438+
pg_fatal("could not read file \"%s\": %m", manifest_path);
439+
else
440+
pg_fatal("could not read file \"%s\": read %d of %lld",
441+
manifest_path, rc, (long long int) statbuf.st_size);
442+
}
443+
444+
/* Close the manifest file. */
445+
close(fd);
446+
447+
/* Parse the manifest. */
448+
json_parse_manifest(&context, buffer, statbuf.st_size);
449+
}
450+
else
451+
{
452+
int bytes_left = statbuf.st_size;
453+
JsonManifestParseIncrementalState *inc_state;
454+
455+
inc_state = json_parse_manifest_incremental_init(&context);
456+
457+
buffer = pg_malloc(chunk_size + 1);
458+
459+
while (bytes_left > 0)
460+
{
461+
int bytes_to_read = chunk_size;
462+
463+
/*
464+
* Make sure that the last chunk is sufficiently large. (i.e. at
465+
* least half the chunk size) so that it will contain fully the
466+
* piece at the end with the checksum.
467+
*/
468+
if (bytes_left < chunk_size)
469+
bytes_to_read = bytes_left;
470+
else if (bytes_left < 2 * chunk_size)
471+
bytes_to_read = bytes_left / 2;
472+
rc = read(fd, buffer, bytes_to_read);
473+
if (rc != bytes_to_read)
474+
{
475+
if (rc < 0)
476+
pg_fatal("could not read file \"%s\": %m", manifest_path);
477+
else
478+
pg_fatal("could not read file \"%s\": read %lld of %lld",
479+
manifest_path,
480+
(long long int) (statbuf.st_size + rc - bytes_left),
481+
(long long int) statbuf.st_size);
482+
}
483+
bytes_left -= rc;
484+
json_parse_manifest_incremental_chunk(
485+
inc_state, buffer, rc, bytes_left == 0);
486+
}
487+
488+
close(fd);
489+
}
448490

449491
/* Done with the buffer. */
450492
pfree(buffer);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy