Skip to content

Commit 6d423e9

Browse files
committed
Handle interleavings between CREATE DATABASE steps and base backup.
Restoring a base backup taken in the middle of CreateDirAndVersionFile() or write_relmap_file() would lose the function's effects. The symptom was absence of the database directory, PG_VERSION file, or pg_filenode.map. If missing the directory, recovery would fail. Either missing file would not fail recovery but would render the new database unusable. Fix CreateDirAndVersionFile() with the transam/README "action first and then write a WAL entry" strategy. That has a side benefit of moving filesystem mutations out of a critical section, reducing the ways to PANIC. Fix the write_relmap_file() call with a lock acquisition, so it interacts with checkpoints like non-CREATE DATABASE calls do. Back-patch to v15, where commit 9c08aea introduced STRATEGY=WAL_LOG and made it the default. Discussion: https://postgr.es/m/20240130195003.0a.nmisch@google.com
1 parent b4fb76f commit 6d423e9

File tree

2 files changed

+32
-28
lines changed

2 files changed

+32
-28
lines changed

src/backend/commands/dbcommands.c

Lines changed: 18 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -462,35 +462,12 @@ CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid, bool isRedo)
462462
char buf[16];
463463

464464
/*
465-
* Prepare version data before starting a critical section.
466-
*
467-
* Note that we don't have to copy this from the source database; there's
468-
* only one legal value.
465+
* Note that we don't have to copy version data from the source database;
466+
* there's only one legal value.
469467
*/
470468
sprintf(buf, "%s\n", PG_MAJORVERSION);
471469
nbytes = strlen(PG_MAJORVERSION) + 1;
472470

473-
/* If we are not in WAL replay then write the WAL. */
474-
if (!isRedo)
475-
{
476-
xl_dbase_create_wal_log_rec xlrec;
477-
XLogRecPtr lsn;
478-
479-
START_CRIT_SECTION();
480-
481-
xlrec.db_id = dbid;
482-
xlrec.tablespace_id = tsid;
483-
484-
XLogBeginInsert();
485-
XLogRegisterData((char *) (&xlrec),
486-
sizeof(xl_dbase_create_wal_log_rec));
487-
488-
lsn = XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE_WAL_LOG);
489-
490-
/* As always, WAL must hit the disk before the data update does. */
491-
XLogFlush(lsn);
492-
}
493-
494471
/* Create database directory. */
495472
if (MakePGDirectory(dbpath) < 0)
496473
{
@@ -534,9 +511,24 @@ CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid, bool isRedo)
534511
/* Close the version file. */
535512
CloseTransientFile(fd);
536513

537-
/* Critical section done. */
514+
/* If we are not in WAL replay then write the WAL. */
538515
if (!isRedo)
516+
{
517+
xl_dbase_create_wal_log_rec xlrec;
518+
519+
START_CRIT_SECTION();
520+
521+
xlrec.db_id = dbid;
522+
xlrec.tablespace_id = tsid;
523+
524+
XLogBeginInsert();
525+
XLogRegisterData((char *) (&xlrec),
526+
sizeof(xl_dbase_create_wal_log_rec));
527+
528+
(void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE_WAL_LOG);
529+
539530
END_CRIT_SECTION();
531+
}
540532
}
541533

542534
/*

src/backend/utils/cache/relmapper.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -303,14 +303,15 @@ RelationMapCopy(Oid dbid, Oid tsid, char *srcdbpath, char *dstdbpath)
303303
* Write the same data into the destination database's relmap file.
304304
*
305305
* No sinval is needed because no one can be connected to the destination
306-
* database yet. For the same reason, there is no need to acquire
307-
* RelationMappingLock.
306+
* database yet.
308307
*
309308
* There's no point in trying to preserve files here. The new database
310309
* isn't usable yet anyway, and won't ever be if we can't install a relmap
311310
* file.
312311
*/
312+
LWLockAcquire(RelationMappingLock, LW_EXCLUSIVE);
313313
write_relmap_file(&map, true, false, false, dbid, tsid, dstdbpath);
314+
LWLockRelease(RelationMappingLock);
314315
}
315316

316317
/*
@@ -633,10 +634,12 @@ RelationMapFinishBootstrap(void)
633634
Assert(pending_local_updates.num_mappings == 0);
634635

635636
/* Write the files; no WAL or sinval needed */
637+
LWLockAcquire(RelationMappingLock, LW_EXCLUSIVE);
636638
write_relmap_file(&shared_map, false, false, false,
637639
InvalidOid, GLOBALTABLESPACE_OID, "global");
638640
write_relmap_file(&local_map, false, false, false,
639641
MyDatabaseId, MyDatabaseTableSpace, DatabasePath);
642+
LWLockRelease(RelationMappingLock);
640643
}
641644

642645
/*
@@ -891,6 +894,15 @@ write_relmap_file(RelMapFile *newmap, bool write_wal, bool send_sinval,
891894
char mapfilename[MAXPGPATH];
892895
char maptempfilename[MAXPGPATH];
893896

897+
/*
898+
* Even without concurrent use of this map, CheckPointRelationMap() relies
899+
* on this locking. Without it, a restore of a base backup taken after
900+
* this function's XLogInsert() and before its durable_rename() would not
901+
* have the changes. wal_level=minimal doesn't need the lock, but this
902+
* isn't performance-critical enough for such a micro-optimization.
903+
*/
904+
Assert(LWLockHeldByMeInMode(RelationMappingLock, LW_EXCLUSIVE));
905+
894906
/*
895907
* Fill in the overhead fields and update CRC.
896908
*/

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy