From 37c7a7eeb6d13aac8edd7af032562233b0769e34 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Wed, 23 Jul 2025 10:29:45 -0500 Subject: [PATCH 01/67] Use PqMsg_* macros in walsender.c Oversights in commits f4b54e1ed9, dc21234005, and 228c370868. Author: Dave Cramer Discussion: https://postgr.es/m/CADK3HH%2BowWVdnbmWH4NHG8%3D%2BkXA_wjsyEVLoY719iJnb%3D%2BtT6A%40mail.gmail.com --- src/backend/replication/walsender.c | 33 +++++++++++++++-------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 4c72a0d43b32b..ee911394a23c6 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -65,6 +65,7 @@ #include "funcapi.h" #include "libpq/libpq.h" #include "libpq/pqformat.h" +#include "libpq/protocol.h" #include "miscadmin.h" #include "nodes/replnodes.h" #include "pgstat.h" @@ -735,13 +736,13 @@ HandleUploadManifestPacket(StringInfo buf, off_t *offset, switch (mtype) { - case 'd': /* CopyData */ + case PqMsg_CopyData: maxmsglen = PQ_LARGE_MESSAGE_LIMIT; break; - case 'c': /* CopyDone */ - case 'f': /* CopyFail */ - case 'H': /* Flush */ - case 'S': /* Sync */ + case PqMsg_CopyDone: + case PqMsg_CopyFail: + case PqMsg_Flush: + case PqMsg_Sync: maxmsglen = PQ_SMALL_MESSAGE_LIMIT; break; default: @@ -763,19 +764,19 @@ HandleUploadManifestPacket(StringInfo buf, off_t *offset, /* Process the message */ switch (mtype) { - case 'd': /* CopyData */ + case PqMsg_CopyData: AppendIncrementalManifestData(ib, buf->data, buf->len); return true; - case 'c': /* CopyDone */ + case PqMsg_CopyDone: return false; - case 'H': /* Sync */ - case 'S': /* Flush */ + case PqMsg_Sync: + case PqMsg_Flush: /* Ignore these while in CopyOut mode as we do elsewhere. */ return true; - case 'f': + case PqMsg_CopyFail: ereport(ERROR, (errcode(ERRCODE_QUERY_CANCELED), errmsg("COPY from stdin failed: %s", @@ -1569,7 +1570,7 @@ WalSndWriteData(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid, tmpbuf.data, sizeof(int64)); /* output previously gathered data in a CopyData packet */ - pq_putmessage_noblock('d', ctx->out->data, ctx->out->len); + pq_putmessage_noblock(PqMsg_CopyData, ctx->out->data, ctx->out->len); CHECK_FOR_INTERRUPTS(); @@ -2305,7 +2306,7 @@ ProcessRepliesIfAny(void) case PqMsg_CopyDone: if (!streamingDoneSending) { - pq_putmessage_noblock('c', NULL, 0); + pq_putmessage_noblock(PqMsg_CopyDone, NULL, 0); streamingDoneSending = true; } @@ -2758,7 +2759,7 @@ ProcessStandbyPSRequestMessage(void) pq_sendint64(&output_message, GetCurrentTimestamp()); /* ... and send it wrapped in CopyData */ - pq_putmessage_noblock('d', output_message.data, output_message.len); + pq_putmessage_noblock(PqMsg_CopyData, output_message.data, output_message.len); } /* @@ -3306,7 +3307,7 @@ XLogSendPhysical(void) wal_segment_close(xlogreader); /* Send CopyDone */ - pq_putmessage_noblock('c', NULL, 0); + pq_putmessage_noblock(PqMsg_CopyDone, NULL, 0); streamingDoneSending = true; WalSndCaughtUp = true; @@ -3434,7 +3435,7 @@ XLogSendPhysical(void) memcpy(&output_message.data[1 + sizeof(int64) + sizeof(int64)], tmpbuf.data, sizeof(int64)); - pq_putmessage_noblock('d', output_message.data, output_message.len); + pq_putmessage_noblock(PqMsg_CopyData, output_message.data, output_message.len); sentPtr = endptr; @@ -4140,7 +4141,7 @@ WalSndKeepalive(bool requestReply, XLogRecPtr writePtr) pq_sendbyte(&output_message, requestReply ? 1 : 0); /* ... and send it wrapped in CopyData */ - pq_putmessage_noblock('d', output_message.data, output_message.len); + pq_putmessage_noblock(PqMsg_CopyData, output_message.data, output_message.len); /* Set local flag */ if (requestReply) From 2047ad068139f0b8c6da73d0b845ca9ba30fb33d Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Wed, 23 Jul 2025 12:06:20 -0500 Subject: [PATCH 02/67] Cross-check lists of built-in LWLock tranches. lwlock.c, lwlock.h, and wait_event_names.txt each contain a list of built-in LWLock tranches. It is easy to miss one or the other when adding or removing tranches, and discrepancies have adverse effects (e.g., breaking JOINs between pg_stat_activity and pg_wait_events). This commit moves the lists of built-in tranches in lwlock.{c,h} to lwlocklist.h and adds a cross-check to the script that generates lwlocknames.h. If the lists do not match exactly, building will fail. Author: Bertrand Drouvot Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/aHpOgwuFQfcFMZ/B%40ip-10-97-1-34.eu-west-3.compute.internal --- .../storage/lmgr/generate-lwlocknames.pl | 110 +++++++++++++----- src/backend/storage/lmgr/lwlock.c | 48 +------- .../utils/activity/wait_event_names.txt | 10 +- src/include/storage/lwlock.h | 56 +++------ src/include/storage/lwlocklist.h | 57 ++++++++- 5 files changed, 162 insertions(+), 119 deletions(-) diff --git a/src/backend/storage/lmgr/generate-lwlocknames.pl b/src/backend/storage/lmgr/generate-lwlocknames.pl index c7a6720440db6..cd3e43c448aed 100644 --- a/src/backend/storage/lmgr/generate-lwlocknames.pl +++ b/src/backend/storage/lmgr/generate-lwlocknames.pl @@ -27,18 +27,24 @@ # -# First, record the predefined LWLocks listed in wait_event_names.txt. We'll -# cross-check those with the ones in lwlocklist.h. +# First, record the predefined LWLocks and built-in tranches listed in +# wait_event_names.txt. We'll cross-check those with the ones in lwlocklist.h. # +my @wait_event_tranches; my @wait_event_lwlocks; my $record_lwlocks = 0; +my $in_tranches = 0; while (<$wait_event_names>) { chomp; # Check for end marker. - last if /^# END OF PREDEFINED LWLOCKS/; + if (/^# END OF PREDEFINED LWLOCKS/) + { + $in_tranches = 1; + next; + } # Skip comments and empty lines. next if /^#/; @@ -54,13 +60,29 @@ # Go to the next line if we are not yet recording LWLocks. next if not $record_lwlocks; + # Stop recording if we reach another section. + last if /^Section:/; + # Record the LWLock. (my $waiteventname, my $waitevendocsentence) = split(/\t/, $_); - push(@wait_event_lwlocks, $waiteventname); + + if ($in_tranches) + { + push(@wait_event_tranches, $waiteventname); + } + else + { + push(@wait_event_lwlocks, $waiteventname); + } } +# +# While gathering the list of predefined LWLocks, cross-check the lists in +# lwlocklist.h with the wait events we just recorded. +# my $in_comment = 0; -my $i = 0; +my $lwlock_count = 0; +my $tranche_count = 0; while (<$lwlocklist>) { chomp; @@ -81,38 +103,72 @@ next; } - die "unable to parse lwlocklist.h line \"$_\"" - unless /^PG_LWLOCK\((\d+),\s+(\w+)\)$/; + # + # Gather list of predefined LWLocks and cross-check with the wait events. + # + if (/^PG_LWLOCK\((\d+),\s+(\w+)\)$/) + { + my ($lockidx, $lockname) = ($1, $2); - (my $lockidx, my $lockname) = ($1, $2); + die "lwlocklist.h not in order" if $lockidx < $lastlockidx; + die "lwlocklist.h has duplicates" if $lockidx == $lastlockidx; - die "lwlocklist.h not in order" if $lockidx < $lastlockidx; - die "lwlocklist.h has duplicates" if $lockidx == $lastlockidx; + die "$lockname defined in lwlocklist.h but missing from " + . "wait_event_names.txt" + if $lwlock_count >= scalar @wait_event_lwlocks; + die "lists of predefined LWLocks do not match (first mismatch at " + . "$wait_event_lwlocks[$lwlock_count] in wait_event_names.txt and " + . "$lockname in lwlocklist.h)" + if $wait_event_lwlocks[$lwlock_count] ne $lockname; - die "$lockname defined in lwlocklist.h but missing from " - . "wait_event_names.txt" - if $i >= scalar @wait_event_lwlocks; - die "lists of predefined LWLocks do not match (first mismatch at " - . "$wait_event_lwlocks[$i] in wait_event_names.txt and $lockname in " - . "lwlocklist.h)" - if $wait_event_lwlocks[$i] ne $lockname; - $i++; + $lwlock_count++; - while ($lastlockidx < $lockidx - 1) + while ($lastlockidx < $lockidx - 1) + { + ++$lastlockidx; + } + $lastlockidx = $lockidx; + + # Add a "Lock" suffix to each lock name, as the C code depends on that. + printf $h "#define %-32s (&MainLWLockArray[$lockidx].lock)\n", + $lockname . "Lock"; + + next; + } + + # + # Cross-check the built-in LWLock tranches with the wait events. + # + if (/^PG_LWLOCKTRANCHE\((\w+),\s+(\w+)\)$/) { - ++$lastlockidx; + my ($tranche_id, $tranche_name) = ($1, $2); + + die "$tranche_name defined in lwlocklist.h but missing from " + . "wait_event_names.txt" + if $tranche_count >= scalar @wait_event_tranches; + die + "lists of built-in LWLock tranches do not match (first mismatch at " + . "$wait_event_tranches[$tranche_count] in wait_event_names.txt and " + . "$tranche_name in lwlocklist.h)" + if $wait_event_tranches[$tranche_count] ne $tranche_name; + + $tranche_count++; + + next; } - $lastlockidx = $lockidx; - # Add a "Lock" suffix to each lock name, as the C code depends on that - printf $h "#define %-32s (&MainLWLockArray[$lockidx].lock)\n", - $lockname . "Lock"; + die "unable to parse lwlocklist.h line \"$_\""; } die - "$wait_event_lwlocks[$i] defined in wait_event_names.txt but missing from " - . "lwlocklist.h" - if $i < scalar @wait_event_lwlocks; + "$wait_event_lwlocks[$lwlock_count] defined in wait_event_names.txt but " + . " missing from lwlocklist.h" + if $lwlock_count < scalar @wait_event_lwlocks; + +die + "$wait_event_tranches[$tranche_count] defined in wait_event_names.txt but " + . "missing from lwlocklist.h" + if $tranche_count < scalar @wait_event_tranches; print $h "\n"; printf $h "#define NUM_INDIVIDUAL_LWLOCKS %s\n", $lastlockidx + 1; diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 2d43bf2cc1323..ec9c345ffdfb8 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -122,9 +122,8 @@ StaticAssertDecl((LW_VAL_EXCLUSIVE & LW_FLAG_MASK) == 0, * own tranche. We absorb the names of these tranches from there into * BuiltinTrancheNames here. * - * 2. There are some predefined tranches for built-in groups of locks. - * These are listed in enum BuiltinTrancheIds in lwlock.h, and their names - * appear in BuiltinTrancheNames[] below. + * 2. There are some predefined tranches for built-in groups of locks defined + * in lwlocklist.h. We absorb the names of these tranches, too. * * 3. Extensions can create new tranches, via either RequestNamedLWLockTranche * or LWLockRegisterTranche. The names of these that are known in the current @@ -135,49 +134,10 @@ StaticAssertDecl((LW_VAL_EXCLUSIVE & LW_FLAG_MASK) == 0, */ static const char *const BuiltinTrancheNames[] = { #define PG_LWLOCK(id, lockname) [id] = CppAsString(lockname), +#define PG_LWLOCKTRANCHE(id, lockname) [LWTRANCHE_##id] = CppAsString(lockname), #include "storage/lwlocklist.h" #undef PG_LWLOCK - [LWTRANCHE_XACT_BUFFER] = "XactBuffer", - [LWTRANCHE_COMMITTS_BUFFER] = "CommitTsBuffer", - [LWTRANCHE_SUBTRANS_BUFFER] = "SubtransBuffer", - [LWTRANCHE_MULTIXACTOFFSET_BUFFER] = "MultiXactOffsetBuffer", - [LWTRANCHE_MULTIXACTMEMBER_BUFFER] = "MultiXactMemberBuffer", - [LWTRANCHE_NOTIFY_BUFFER] = "NotifyBuffer", - [LWTRANCHE_SERIAL_BUFFER] = "SerialBuffer", - [LWTRANCHE_WAL_INSERT] = "WALInsert", - [LWTRANCHE_BUFFER_CONTENT] = "BufferContent", - [LWTRANCHE_REPLICATION_ORIGIN_STATE] = "ReplicationOriginState", - [LWTRANCHE_REPLICATION_SLOT_IO] = "ReplicationSlotIO", - [LWTRANCHE_LOCK_FASTPATH] = "LockFastPath", - [LWTRANCHE_BUFFER_MAPPING] = "BufferMapping", - [LWTRANCHE_LOCK_MANAGER] = "LockManager", - [LWTRANCHE_PREDICATE_LOCK_MANAGER] = "PredicateLockManager", - [LWTRANCHE_PARALLEL_HASH_JOIN] = "ParallelHashJoin", - [LWTRANCHE_PARALLEL_BTREE_SCAN] = "ParallelBtreeScan", - [LWTRANCHE_PARALLEL_QUERY_DSA] = "ParallelQueryDSA", - [LWTRANCHE_PER_SESSION_DSA] = "PerSessionDSA", - [LWTRANCHE_PER_SESSION_RECORD_TYPE] = "PerSessionRecordType", - [LWTRANCHE_PER_SESSION_RECORD_TYPMOD] = "PerSessionRecordTypmod", - [LWTRANCHE_SHARED_TUPLESTORE] = "SharedTupleStore", - [LWTRANCHE_SHARED_TIDBITMAP] = "SharedTidBitmap", - [LWTRANCHE_PARALLEL_APPEND] = "ParallelAppend", - [LWTRANCHE_PER_XACT_PREDICATE_LIST] = "PerXactPredicateList", - [LWTRANCHE_PGSTATS_DSA] = "PgStatsDSA", - [LWTRANCHE_PGSTATS_HASH] = "PgStatsHash", - [LWTRANCHE_PGSTATS_DATA] = "PgStatsData", - [LWTRANCHE_LAUNCHER_DSA] = "LogicalRepLauncherDSA", - [LWTRANCHE_LAUNCHER_HASH] = "LogicalRepLauncherHash", - [LWTRANCHE_DSM_REGISTRY_DSA] = "DSMRegistryDSA", - [LWTRANCHE_DSM_REGISTRY_HASH] = "DSMRegistryHash", - [LWTRANCHE_COMMITTS_SLRU] = "CommitTsSLRU", - [LWTRANCHE_MULTIXACTOFFSET_SLRU] = "MultiXactOffsetSLRU", - [LWTRANCHE_MULTIXACTMEMBER_SLRU] = "MultiXactMemberSLRU", - [LWTRANCHE_NOTIFY_SLRU] = "NotifySLRU", - [LWTRANCHE_SERIAL_SLRU] = "SerialSLRU", - [LWTRANCHE_SUBTRANS_SLRU] = "SubtransSLRU", - [LWTRANCHE_XACT_SLRU] = "XactSLRU", - [LWTRANCHE_PARALLEL_VACUUM_DSA] = "ParallelVacuumDSA", - [LWTRANCHE_AIO_URING_COMPLETION] = "AioUringCompletion", +#undef PG_LWLOCKTRANCHE }; StaticAssertDecl(lengthof(BuiltinTrancheNames) == diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt index 4da68312b5f97..0be307d2ca04b 100644 --- a/src/backend/utils/activity/wait_event_names.txt +++ b/src/backend/utils/activity/wait_event_names.txt @@ -356,9 +356,13 @@ AioWorkerSubmissionQueue "Waiting to access AIO worker submission queue." # # END OF PREDEFINED LWLOCKS (DO NOT CHANGE THIS LINE) # -# Predefined LWLocks (i.e., those declared in lwlocknames.h) must be listed -# in the section above and must be listed in the same order as in -# lwlocknames.h. Other LWLocks must be listed in the section below. +# Predefined LWLocks (i.e., those declared at the top of lwlocknames.h) must be +# listed in the section above and must be listed in the same order as in +# lwlocknames.h. +# +# Likewise, the built-in LWLock tranches (i.e., those declared at the bottom of +# lwlocknames.h) must be listed in the section below and must be listed in the +# same order as in lwlocknames.h. # XactBuffer "Waiting for I/O on a transaction status SLRU buffer." diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 08a72569ae5fd..5e717765764f4 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -176,51 +176,23 @@ extern void LWLockInitialize(LWLock *lock, int tranche_id); * Every tranche ID less than NUM_INDIVIDUAL_LWLOCKS is reserved; also, * we reserve additional tranche IDs for builtin tranches not included in * the set of individual LWLocks. A call to LWLockNewTrancheId will never - * return a value less than LWTRANCHE_FIRST_USER_DEFINED. + * return a value less than LWTRANCHE_FIRST_USER_DEFINED. The actual list of + * built-in tranches is kept in lwlocklist.h. */ typedef enum BuiltinTrancheIds { - LWTRANCHE_XACT_BUFFER = NUM_INDIVIDUAL_LWLOCKS, - LWTRANCHE_COMMITTS_BUFFER, - LWTRANCHE_SUBTRANS_BUFFER, - LWTRANCHE_MULTIXACTOFFSET_BUFFER, - LWTRANCHE_MULTIXACTMEMBER_BUFFER, - LWTRANCHE_NOTIFY_BUFFER, - LWTRANCHE_SERIAL_BUFFER, - LWTRANCHE_WAL_INSERT, - LWTRANCHE_BUFFER_CONTENT, - LWTRANCHE_REPLICATION_ORIGIN_STATE, - LWTRANCHE_REPLICATION_SLOT_IO, - LWTRANCHE_LOCK_FASTPATH, - LWTRANCHE_BUFFER_MAPPING, - LWTRANCHE_LOCK_MANAGER, - LWTRANCHE_PREDICATE_LOCK_MANAGER, - LWTRANCHE_PARALLEL_HASH_JOIN, - LWTRANCHE_PARALLEL_BTREE_SCAN, - LWTRANCHE_PARALLEL_QUERY_DSA, - LWTRANCHE_PER_SESSION_DSA, - LWTRANCHE_PER_SESSION_RECORD_TYPE, - LWTRANCHE_PER_SESSION_RECORD_TYPMOD, - LWTRANCHE_SHARED_TUPLESTORE, - LWTRANCHE_SHARED_TIDBITMAP, - LWTRANCHE_PARALLEL_APPEND, - LWTRANCHE_PER_XACT_PREDICATE_LIST, - LWTRANCHE_PGSTATS_DSA, - LWTRANCHE_PGSTATS_HASH, - LWTRANCHE_PGSTATS_DATA, - LWTRANCHE_LAUNCHER_DSA, - LWTRANCHE_LAUNCHER_HASH, - LWTRANCHE_DSM_REGISTRY_DSA, - LWTRANCHE_DSM_REGISTRY_HASH, - LWTRANCHE_COMMITTS_SLRU, - LWTRANCHE_MULTIXACTMEMBER_SLRU, - LWTRANCHE_MULTIXACTOFFSET_SLRU, - LWTRANCHE_NOTIFY_SLRU, - LWTRANCHE_SERIAL_SLRU, - LWTRANCHE_SUBTRANS_SLRU, - LWTRANCHE_XACT_SLRU, - LWTRANCHE_PARALLEL_VACUUM_DSA, - LWTRANCHE_AIO_URING_COMPLETION, + /* + * LWTRANCHE_INVALID is an unused value that only exists to initialize the + * rest of the tranches to appropriate values. + */ + LWTRANCHE_INVALID = NUM_INDIVIDUAL_LWLOCKS - 1, + +#define PG_LWLOCK(id, name) +#define PG_LWLOCKTRANCHE(id, name) LWTRANCHE_##id, +#include "storage/lwlocklist.h" +#undef PG_LWLOCK +#undef PG_LWLOCKTRANCHE + LWTRANCHE_FIRST_USER_DEFINED, } BuiltinTrancheIds; diff --git a/src/include/storage/lwlocklist.h b/src/include/storage/lwlocklist.h index a9681738146e1..208d2e3a8ed9e 100644 --- a/src/include/storage/lwlocklist.h +++ b/src/include/storage/lwlocklist.h @@ -2,9 +2,10 @@ * * lwlocklist.h * - * The predefined LWLock list is kept in its own source file for use by - * automatic tools. The exact representation of a keyword is determined by - * the PG_LWLOCK macro, which is not defined in this file; it can be + * The list of predefined LWLocks and built-in LWLock tranches is kept in + * its own source file for use by automatic tools. The exact + * representation of a keyword is determined by the PG_LWLOCK and + * PG_LWLOCKTRANCHE macros, which are not defined in this file; they can be * defined by the caller for special purposes. * * Also, generate-lwlocknames.pl processes this file to create lwlocknames.h. @@ -84,3 +85,53 @@ PG_LWLOCK(50, DSMRegistry) PG_LWLOCK(51, InjectionPoint) PG_LWLOCK(52, SerialControl) PG_LWLOCK(53, AioWorkerSubmissionQueue) + +/* + * There also exist several built-in LWLock tranches. As with the predefined + * LWLocks, be sure to update the WaitEventLWLock section of + * src/backend/utils/activity/wait_event_names.txt when modifying this list. + * + * Note that the IDs here (the first value) don't include the LWTRANCHE_ + * prefix. It's added elsewhere. + */ +PG_LWLOCKTRANCHE(XACT_BUFFER, XactBuffer) +PG_LWLOCKTRANCHE(COMMITTS_BUFFER, CommitTsBuffer) +PG_LWLOCKTRANCHE(SUBTRANS_BUFFER, SubtransBuffer) +PG_LWLOCKTRANCHE(MULTIXACTOFFSET_BUFFER, MultiXactOffsetBuffer) +PG_LWLOCKTRANCHE(MULTIXACTMEMBER_BUFFER, MultiXactMemberBuffer) +PG_LWLOCKTRANCHE(NOTIFY_BUFFER, NotifyBuffer) +PG_LWLOCKTRANCHE(SERIAL_BUFFER, SerialBuffer) +PG_LWLOCKTRANCHE(WAL_INSERT, WALInsert) +PG_LWLOCKTRANCHE(BUFFER_CONTENT, BufferContent) +PG_LWLOCKTRANCHE(REPLICATION_ORIGIN_STATE, ReplicationOriginState) +PG_LWLOCKTRANCHE(REPLICATION_SLOT_IO, ReplicationSlotIO) +PG_LWLOCKTRANCHE(LOCK_FASTPATH, LockFastPath) +PG_LWLOCKTRANCHE(BUFFER_MAPPING, BufferMapping) +PG_LWLOCKTRANCHE(LOCK_MANAGER, LockManager) +PG_LWLOCKTRANCHE(PREDICATE_LOCK_MANAGER, PredicateLockManager) +PG_LWLOCKTRANCHE(PARALLEL_HASH_JOIN, ParallelHashJoin) +PG_LWLOCKTRANCHE(PARALLEL_BTREE_SCAN, ParallelBtreeScan) +PG_LWLOCKTRANCHE(PARALLEL_QUERY_DSA, ParallelQueryDSA) +PG_LWLOCKTRANCHE(PER_SESSION_DSA, PerSessionDSA) +PG_LWLOCKTRANCHE(PER_SESSION_RECORD_TYPE, PerSessionRecordType) +PG_LWLOCKTRANCHE(PER_SESSION_RECORD_TYPMOD, PerSessionRecordTypmod) +PG_LWLOCKTRANCHE(SHARED_TUPLESTORE, SharedTupleStore) +PG_LWLOCKTRANCHE(SHARED_TIDBITMAP, SharedTidBitmap) +PG_LWLOCKTRANCHE(PARALLEL_APPEND, ParallelAppend) +PG_LWLOCKTRANCHE(PER_XACT_PREDICATE_LIST, PerXactPredicateList) +PG_LWLOCKTRANCHE(PGSTATS_DSA, PgStatsDSA) +PG_LWLOCKTRANCHE(PGSTATS_HASH, PgStatsHash) +PG_LWLOCKTRANCHE(PGSTATS_DATA, PgStatsData) +PG_LWLOCKTRANCHE(LAUNCHER_DSA, LogicalRepLauncherDSA) +PG_LWLOCKTRANCHE(LAUNCHER_HASH, LogicalRepLauncherHash) +PG_LWLOCKTRANCHE(DSM_REGISTRY_DSA, DSMRegistryDSA) +PG_LWLOCKTRANCHE(DSM_REGISTRY_HASH, DSMRegistryHash) +PG_LWLOCKTRANCHE(COMMITTS_SLRU, CommitTsSLRU) +PG_LWLOCKTRANCHE(MULTIXACTOFFSET_SLRU, MultiXactOffsetSLRU) +PG_LWLOCKTRANCHE(MULTIXACTMEMBER_SLRU, MultiXactMemberSLRU) +PG_LWLOCKTRANCHE(NOTIFY_SLRU, NotifySLRU) +PG_LWLOCKTRANCHE(SERIAL_SLRU, SerialSLRU) +PG_LWLOCKTRANCHE(SUBTRANS_SLRU, SubtransSLRU) +PG_LWLOCKTRANCHE(XACT_SLRU, XactSLRU) +PG_LWLOCKTRANCHE(PARALLEL_VACUUM_DSA, ParallelVacuumDSA) +PG_LWLOCKTRANCHE(AIO_URING_COMPLETION, AioUringCompletion) From e6dfd068ed453b8690551dac700d57fbf32ba187 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 23 Jul 2025 15:44:29 -0400 Subject: [PATCH 03/67] Fix build breakage on Solaris-alikes with late-model GCC. Solaris has never bothered to add "const" to the second argument of PAM conversation procs, as all other Unixen did decades ago. This resulted in an "incompatible pointer" compiler warning when building --with-pam, but had no more serious effect than that, so we never did anything about it. However, as of GCC 14 the case is an error not warning by default. To complicate matters, recent OpenIndiana (and maybe illumos in general?) *does* supply the "const" by default, so we can't just assume that platforms using our solaris template need help. What we can do, short of building a configure-time probe, is to make solaris.h #define _PAM_LEGACY_NONCONST, which causes OpenIndiana's pam_appl.h to revert to the traditional definition, and hopefully will have no effect anywhere else. Then we can use that same symbol to control whether we include "const" in the declaration of pam_passwd_conv_proc(). Bug: #18995 Reported-by: Andrew Watkins Author: Tom Lane Discussion: https://postgr.es/m/18995-82058da9ab4337a7@postgresql.org Backpatch-through: 13 --- src/backend/libpq/auth.c | 12 ++++++++++-- src/include/port/solaris.h | 9 +++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/backend/libpq/auth.c b/src/backend/libpq/auth.c index 9f4d05ffbd453..4da46666439db 100644 --- a/src/backend/libpq/auth.c +++ b/src/backend/libpq/auth.c @@ -94,8 +94,16 @@ static int auth_peer(hbaPort *port); #define PGSQL_PAM_SERVICE "postgresql" /* Service name passed to PAM */ +/* Work around original Solaris' lack of "const" in the conv_proc signature */ +#ifdef _PAM_LEGACY_NONCONST +#define PG_PAM_CONST +#else +#define PG_PAM_CONST const +#endif + static int CheckPAMAuth(Port *port, const char *user, const char *password); -static int pam_passwd_conv_proc(int num_msg, const struct pam_message **msg, +static int pam_passwd_conv_proc(int num_msg, + PG_PAM_CONST struct pam_message **msg, struct pam_response **resp, void *appdata_ptr); static struct pam_conv pam_passw_conv = { @@ -1917,7 +1925,7 @@ auth_peer(hbaPort *port) */ static int -pam_passwd_conv_proc(int num_msg, const struct pam_message **msg, +pam_passwd_conv_proc(int num_msg, PG_PAM_CONST struct pam_message **msg, struct pam_response **resp, void *appdata_ptr) { const char *passwd; diff --git a/src/include/port/solaris.h b/src/include/port/solaris.h index e63a3bd824d6d..8ff40007c7f6a 100644 --- a/src/include/port/solaris.h +++ b/src/include/port/solaris.h @@ -24,3 +24,12 @@ #if defined(__i386__) #include #endif + +/* + * On original Solaris, PAM conversation procs lack a "const" in their + * declaration; but recent OpenIndiana versions put it there by default. + * The least messy way to deal with this is to define _PAM_LEGACY_NONCONST, + * which causes OpenIndiana to declare pam_conv per the Solaris tradition, + * and also use that symbol to control omitting the "const" in our own code. + */ +#define _PAM_LEGACY_NONCONST 1 From 086b9a33aafeeeaa0af0c806410ea6228f2f63f4 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Thu, 24 Jul 2025 11:43:20 +0900 Subject: [PATCH 04/67] doc: Add missing index entries and fix title formatting in pg_buffercache docs. This commit adds missing index entries for the functions pg_buffercache_numa() and pg_buffercache_usage_counts() in the pg_buffercache documentation. It also makes the function titles consistent by adding parentheses after function names where they were previously missing. Author: Fujii Masao Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/7d19af4b-7da3-4862-9f52-ff958960bd8d@oss.nttdata.com Backpatch-through: 18 --- doc/src/sgml/pgbuffercache.sgml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/pgbuffercache.sgml b/doc/src/sgml/pgbuffercache.sgml index 546ace8369e28..eeb85a0e04908 100644 --- a/doc/src/sgml/pgbuffercache.sgml +++ b/doc/src/sgml/pgbuffercache.sgml @@ -19,10 +19,18 @@ pg_buffercache_pages + + pg_buffercache_numa + + pg_buffercache_summary + + pg_buffercache_usage_counts + + pg_buffercache_evict @@ -489,7 +497,7 @@ - The <structname>pg_buffercache_evict_relation</structname> Function + The <structname>pg_buffercache_evict_relation()</structname> Function The pg_buffercache_evict_relation() function is very similar to the pg_buffercache_evict() function. The @@ -507,7 +515,7 @@ - The <structname>pg_buffercache_evict_all</structname> Function + The <structname>pg_buffercache_evict_all()</structname> Function The pg_buffercache_evict_all() function is very similar to the pg_buffercache_evict() function. The From df335618ed87eecdef44a95e453e345a55a14ad8 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Thu, 24 Jul 2025 03:51:55 +0000 Subject: [PATCH 05/67] Fix cfbot failure caused by commit 228c370868. Ensure the test waits for the apply worker to exit after disabling the subscription. This is necessary to safely enable the retain_dead_tuples option. Also added a similar wait in another part of the test to prevent unintended apply worker activity that could lead to test failures post-subscription disable. Reported by Michael Paquier as per cfbot. Author: Zhijie Hou Discussion: https://postgr.es/m/aIGLgfRJIBwExoPj@paquier.xyz --- src/test/subscription/t/035_conflicts.pl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/test/subscription/t/035_conflicts.pl b/src/test/subscription/t/035_conflicts.pl index 7458d7fba7e9a..976d53a870e5e 100644 --- a/src/test/subscription/t/035_conflicts.pl +++ b/src/test/subscription/t/035_conflicts.pl @@ -228,6 +228,11 @@ # Disable the subscription $node_A->psql('postgres', "ALTER SUBSCRIPTION $subname_AB DISABLE;"); +# Wait for the apply worker to stop +$node_A->poll_query_until('postgres', + "SELECT count(*) = 0 FROM pg_stat_activity WHERE backend_type = 'logical replication apply worker'" +); + # Enable retain_dead_tuples for disabled subscription ($cmdret, $stdout, $stderr) = $node_A->psql('postgres', "ALTER SUBSCRIPTION $subname_AB SET (retain_dead_tuples = true);"); @@ -278,6 +283,11 @@ # Disable the logical replication from node B to node A $node_A->safe_psql('postgres', "ALTER SUBSCRIPTION $subname_AB DISABLE"); +# Wait for the apply worker to stop +$node_A->poll_query_until('postgres', + "SELECT count(*) = 0 FROM pg_stat_activity WHERE backend_type = 'logical replication apply worker'" +); + $node_B->safe_psql('postgres', "UPDATE tab SET b = 3 WHERE a = 1;"); $node_A->safe_psql('postgres', "DELETE FROM tab WHERE a = 1;"); From 719dcf3c42260ceebfa2e8f6171a61161737a265 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 24 Jul 2025 15:41:18 +0900 Subject: [PATCH 06/67] Introduce field tracking cached plan type in PlannedStmt PlannedStmt gains a new field, called CachedPlanType, able to track if a given plan tree originates from the cache and if we are dealing with a generic or custom cached plan. This field can be used for monitoring or statistical purposes, in the executor hooks, for example, based on the planned statement attached to a QueryDesc. A patch is under discussion for pg_stat_statements to provide an equivalent of the counters in pg_prepared_statements for custom and generic plans, to provide a more global view of such data, as this data is now restricted to the current session. The concept introduced in this commit is useful on its own, and has been extracted from a larger patch by the same author. Author: Sami Imseih Reviewed-by: Andrei Lepikhov Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/CAA5RZ0uFw8Y9GCFvafhC=OA8NnMqVZyzXPfv_EePOt+iv1T-qQ@mail.gmail.com --- src/backend/commands/foreigncmds.c | 1 + src/backend/commands/schemacmds.c | 1 + src/backend/executor/execParallel.c | 1 + src/backend/optimizer/plan/planner.c | 1 + src/backend/tcop/postgres.c | 1 + src/backend/tcop/utility.c | 2 ++ src/backend/utils/cache/plancache.c | 8 ++++++++ src/include/nodes/plannodes.h | 17 +++++++++++++++++ src/tools/pgindent/typedefs.list | 1 + 9 files changed, 33 insertions(+) diff --git a/src/backend/commands/foreigncmds.c b/src/backend/commands/foreigncmds.c index 8d2d743154462..fcd5fcd8915e3 100644 --- a/src/backend/commands/foreigncmds.c +++ b/src/backend/commands/foreigncmds.c @@ -1588,6 +1588,7 @@ ImportForeignSchema(ImportForeignSchemaStmt *stmt) pstmt->utilityStmt = (Node *) cstmt; pstmt->stmt_location = rs->stmt_location; pstmt->stmt_len = rs->stmt_len; + pstmt->cached_plan_type = PLAN_CACHE_NONE; /* Execute statement */ ProcessUtility(pstmt, cmd, false, diff --git a/src/backend/commands/schemacmds.c b/src/backend/commands/schemacmds.c index 546160f09410e..c00f1a11384f1 100644 --- a/src/backend/commands/schemacmds.c +++ b/src/backend/commands/schemacmds.c @@ -215,6 +215,7 @@ CreateSchemaCommand(CreateSchemaStmt *stmt, const char *queryString, wrapper->utilityStmt = stmt; wrapper->stmt_location = stmt_location; wrapper->stmt_len = stmt_len; + wrapper->cached_plan_type = PLAN_CACHE_NONE; /* do this step */ ProcessUtility(wrapper, diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index f3e77bda27906..fc76f22fb8238 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -189,6 +189,7 @@ ExecSerializePlan(Plan *plan, EState *estate) pstmt->permInfos = estate->es_rteperminfos; pstmt->resultRelations = NIL; pstmt->appendRelations = NIL; + pstmt->cached_plan_type = PLAN_CACHE_NONE; /* * Transfer only parallel-safe subplans, leaving a NULL "hole" in the list diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index c989e72cac5cf..a77b2147e9592 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -582,6 +582,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, result->utilityStmt = parse->utilityStmt; result->stmt_location = parse->stmt_location; result->stmt_len = parse->stmt_len; + result->cached_plan_type = PLAN_CACHE_NONE; result->jitFlags = PGJIT_NONE; if (jit_enabled && jit_above_cost >= 0 && diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 2f8c3d5f91822..a297606cdd7fa 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -988,6 +988,7 @@ pg_plan_queries(List *querytrees, const char *query_string, int cursorOptions, stmt->stmt_location = query->stmt_location; stmt->stmt_len = query->stmt_len; stmt->queryId = query->queryId; + stmt->cached_plan_type = PLAN_CACHE_NONE; } else { diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 4c1faf5575c4d..babc34d0cbe1d 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -1234,6 +1234,7 @@ ProcessUtilitySlow(ParseState *pstate, wrapper->utilityStmt = stmt; wrapper->stmt_location = pstmt->stmt_location; wrapper->stmt_len = pstmt->stmt_len; + wrapper->cached_plan_type = PLAN_CACHE_NONE; ProcessUtility(wrapper, queryString, @@ -1964,6 +1965,7 @@ ProcessUtilityForAlterTable(Node *stmt, AlterTableUtilityContext *context) wrapper->utilityStmt = stmt; wrapper->stmt_location = context->pstmt->stmt_location; wrapper->stmt_len = context->pstmt->stmt_len; + wrapper->cached_plan_type = PLAN_CACHE_NONE; ProcessUtility(wrapper, context->queryString, diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c index 89a1c79e984d1..f4d2b9458a5ea 100644 --- a/src/backend/utils/cache/plancache.c +++ b/src/backend/utils/cache/plancache.c @@ -1283,6 +1283,7 @@ GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, CachedPlan *plan = NULL; List *qlist; bool customplan; + ListCell *lc; /* Assert caller is doing things in a sane order */ Assert(plansource->magic == CACHEDPLANSOURCE_MAGIC); @@ -1385,6 +1386,13 @@ GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, plan->is_saved = true; } + foreach(lc, plan->stmt_list) + { + PlannedStmt *pstmt = (PlannedStmt *) lfirst(lc); + + pstmt->cached_plan_type = customplan ? PLAN_CACHE_CUSTOM : PLAN_CACHE_GENERIC; + } + return plan; } diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 4f59e30d62d5e..46e2e09ea35be 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -28,6 +28,20 @@ * ---------------------------------------------------------------- */ +/* ---------------- + * CachedPlanType + * + * CachedPlanType identifies whether a PlannedStmt is a cached plan, and if + * so, whether it is generic or custom. + * ---------------- + */ +typedef enum CachedPlanType +{ + PLAN_CACHE_NONE = 0, /* Not a cached plan */ + PLAN_CACHE_GENERIC, /* Generic cached plan */ + PLAN_CACHE_CUSTOM, /* Custom cached plan */ +} CachedPlanType; + /* ---------------- * PlannedStmt node * @@ -58,6 +72,9 @@ typedef struct PlannedStmt /* plan identifier (can be set by plugins) */ int64 planId; + /* type of cached plan */ + CachedPlanType cached_plan_type; + /* is it insert|update|delete|merge RETURNING? */ bool hasReturning; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index a8656419cb608..4353befab9934 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -391,6 +391,7 @@ CachedFunctionHashEntry CachedFunctionHashKey CachedPlan CachedPlanSource +CachedPlanType CallContext CallStmt CancelRequestPacket From e1c3654839e464957675344a1e949489d98b103b Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Thu, 24 Jul 2025 09:05:32 +0000 Subject: [PATCH 07/67] Fix duplicate transaction replay during pg_createsubscriber. Previously, the tool could replay the same transaction twice, once during recovery, then again during replication after the subscriber was set up. This occurred because the same recovery_target_lsn was used both to finalize recovery and to start replication. If recovery_target_inclusive = true, the transaction at that LSN would be applied during recovery and then sent again by the publisher leading to duplication. To prevent this, we now set recovery_target_inclusive = false. This ensures the transaction at recovery_target_lsn is not reapplied during recovery, avoiding duplication when replication begins. Bug #18897 Reported-by: Zane Duffield Author: Shlok Kyal Reviewed-by: vignesh C Reviewed-by: Amit Kapila Backpatch-through: 17, where it was introduced Discussion: https://postgr.es/m/18897-d3db67535860dddb@postgresql.org --- src/bin/pg_basebackup/pg_createsubscriber.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/bin/pg_basebackup/pg_createsubscriber.c b/src/bin/pg_basebackup/pg_createsubscriber.c index 025b893a41e83..3986882f04292 100644 --- a/src/bin/pg_basebackup/pg_createsubscriber.c +++ b/src/bin/pg_basebackup/pg_createsubscriber.c @@ -1250,8 +1250,17 @@ setup_recovery(const struct LogicalRepInfo *dbinfo, const char *datadir, const c appendPQExpBufferStr(recoveryconfcontents, "recovery_target = ''\n"); appendPQExpBufferStr(recoveryconfcontents, "recovery_target_timeline = 'latest'\n"); + + /* + * Set recovery_target_inclusive = false to avoid reapplying the + * transaction committed at 'lsn' after subscription is enabled. This is + * because the provided 'lsn' is also used as the replication start point + * for the subscription. So, the server can send the transaction committed + * at that 'lsn' after replication is started which can lead to applying + * the same transaction twice if we keep recovery_target_inclusive = true. + */ appendPQExpBufferStr(recoveryconfcontents, - "recovery_target_inclusive = true\n"); + "recovery_target_inclusive = false\n"); appendPQExpBufferStr(recoveryconfcontents, "recovery_target_action = promote\n"); appendPQExpBufferStr(recoveryconfcontents, "recovery_target_name = ''\n"); From 15d33eb1924c1093102b8ce142ede4cb3912e85e Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Thu, 24 Jul 2025 10:13:45 -0500 Subject: [PATCH 08/67] Fix return value of visibilitymap_get_status(). This function is declared as returning a uint8, but it returns a bool in one code path. To fix, return (uint8) 0 instead of false there. This should behave exactly the same as before, but it might prevent future compiler complaints. Oversight in commit a892234f83. Author: Julien Rouhaud Discussion: https://postgr.es/m/aIHluT2isN58jqHV%40jrouhaud --- src/backend/access/heap/visibilitymap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index 745a04ef26e29..8f918e00af7ed 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -364,7 +364,7 @@ visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf) { *vmbuf = vm_readbuf(rel, mapBlock, false); if (!BufferIsValid(*vmbuf)) - return false; + return (uint8) 0; } map = PageGetContents(BufferGetPage(*vmbuf)); From ac000fca743eff923d1feb4bc722d905901ae540 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Fri, 25 Jul 2025 11:17:48 +0900 Subject: [PATCH 09/67] Lower bounds related to pgstats kinds This commit changes stats kinds to have the following bounds, making their handling in core cheaper by default: - PGSTAT_KIND_CUSTOM_MIN 128 -> 24 - PGSTAT_KIND_MAX 256 -> 32 The original numbers were rather high, and showed an impact on performance in pgstat_report_stat() for the case of simple queries with its early-exit path if there are no pending statistics to flush. This logic will be improved more in a follow-up commit to bring the performance of pgstat_report_stat() on par with v17 and older versions. Lowering the bounds is a change worth doing on its own, independently of the other improvement. These new numbers should be enough to leave some room for the following years for built-in and custom stats kinds, with stable ID numbers. At least that should be enough to start with this facility for extension developers. It can be always increased in the tree depending on the requirements wanted. Per discussion with Andres Freund and Bertrand Drouvot. Discussion: https://postgr.es/m/eb224uegsga2hgq7dfq3ps5cduhpqej7ir2hjxzzozjthrekx5@dysei6buqthe Backpatch-through: 18 --- src/include/utils/pgstat_kind.h | 6 +++--- src/test/modules/injection_points/injection_stats.c | 2 +- src/test/modules/injection_points/injection_stats_fixed.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/include/utils/pgstat_kind.h b/src/include/utils/pgstat_kind.h index f44169fd5a3c7..eb5f0b3ae6db7 100644 --- a/src/include/utils/pgstat_kind.h +++ b/src/include/utils/pgstat_kind.h @@ -18,7 +18,7 @@ /* Range of IDs allowed, for built-in and custom kinds */ #define PGSTAT_KIND_MIN 1 /* Minimum ID allowed */ -#define PGSTAT_KIND_MAX 256 /* Maximum ID allowed */ +#define PGSTAT_KIND_MAX 32 /* Maximum ID allowed */ /* use 0 for INVALID, to catch zero-initialized data */ #define PGSTAT_KIND_INVALID 0 @@ -46,7 +46,7 @@ /* Custom stats kinds */ /* Range of IDs allowed for custom stats kinds */ -#define PGSTAT_KIND_CUSTOM_MIN 128 +#define PGSTAT_KIND_CUSTOM_MIN 24 #define PGSTAT_KIND_CUSTOM_MAX PGSTAT_KIND_MAX #define PGSTAT_KIND_CUSTOM_SIZE (PGSTAT_KIND_CUSTOM_MAX - PGSTAT_KIND_CUSTOM_MIN + 1) @@ -55,7 +55,7 @@ * development and have not reserved their own unique kind ID yet. See: * https://wiki.postgresql.org/wiki/CustomCumulativeStats */ -#define PGSTAT_KIND_EXPERIMENTAL 128 +#define PGSTAT_KIND_EXPERIMENTAL 24 static inline bool pgstat_is_kind_builtin(PgStat_Kind kind) diff --git a/src/test/modules/injection_points/injection_stats.c b/src/test/modules/injection_points/injection_stats.c index 14903c629e0d1..e3947b23ba573 100644 --- a/src/test/modules/injection_points/injection_stats.c +++ b/src/test/modules/injection_points/injection_stats.c @@ -59,7 +59,7 @@ static const PgStat_KindInfo injection_stats = { /* * Kind ID reserved for statistics of injection points. */ -#define PGSTAT_KIND_INJECTION 129 +#define PGSTAT_KIND_INJECTION 25 /* Track if stats are loaded */ static bool inj_stats_loaded = false; diff --git a/src/test/modules/injection_points/injection_stats_fixed.c b/src/test/modules/injection_points/injection_stats_fixed.c index 3d0c01bdd05ab..bc54c79d190b9 100644 --- a/src/test/modules/injection_points/injection_stats_fixed.c +++ b/src/test/modules/injection_points/injection_stats_fixed.c @@ -64,7 +64,7 @@ static const PgStat_KindInfo injection_stats_fixed = { /* * Kind ID reserved for statistics of injection points. */ -#define PGSTAT_KIND_INJECTION_FIXED 130 +#define PGSTAT_KIND_INJECTION_FIXED 26 /* Track if fixed-numbered stats are loaded */ static bool inj_fixed_loaded = false; From 641f20d4c433b66df2928408fb2b44bd165c2329 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Fri, 25 Jul 2025 16:17:13 +0900 Subject: [PATCH 10/67] Fix assertion failure with latch wait in single-user mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LatchWaitSetPostmasterDeathPos, the latch event position for the postmaster death event, is initialized under IsUnderPostmaster. WaitLatch() considered it as a valid wait target in single-user mode (!IsUnderPostmaster), which was incorrect. One code path found to fail with an assertion failure is a database drop in single-user mode while waiting in WaitForProcSignalBarrier() after the drop. Oversight in commit 84e5b2f07a5e. Author: Patrick Stählin Co-authored-by: Ronan Dunklau Discussion: https://postgr.es/m/18996-3a2744c8140488de@postgresql.org Backpatch-through: 18 --- src/backend/storage/ipc/latch.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/backend/storage/ipc/latch.c b/src/backend/storage/ipc/latch.c index c6aefd2f688dd..beadeb5e46afa 100644 --- a/src/backend/storage/ipc/latch.c +++ b/src/backend/storage/ipc/latch.c @@ -187,9 +187,11 @@ WaitLatch(Latch *latch, int wakeEvents, long timeout, if (!(wakeEvents & WL_LATCH_SET)) latch = NULL; ModifyWaitEvent(LatchWaitSet, LatchWaitSetLatchPos, WL_LATCH_SET, latch); - ModifyWaitEvent(LatchWaitSet, LatchWaitSetPostmasterDeathPos, - (wakeEvents & (WL_EXIT_ON_PM_DEATH | WL_POSTMASTER_DEATH)), - NULL); + + if (IsUnderPostmaster) + ModifyWaitEvent(LatchWaitSet, LatchWaitSetPostmasterDeathPos, + (wakeEvents & (WL_EXIT_ON_PM_DEATH | WL_POSTMASTER_DEATH)), + NULL); if (WaitEventSetWait(LatchWaitSet, (wakeEvents & WL_TIMEOUT) ? timeout : -1, From b5d084c5353f29e2e217dfa86f327e14d02998c1 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Fri, 25 Jul 2025 18:38:36 +0900 Subject: [PATCH 11/67] Fix background worker not restarting after crash-and-restart cycle. Previously, if a background worker crashed (e.g., due to a SIGKILL) and the server restarted due to restart_after_crash being enabled, the worker was not restarted as expected. Background workers without the never-restart flag should automatically restart in this case. This issue was introduced in commit 28a520c0b77, which failed to reset the rw_pid field in the RegisteredBgWorker struct for the crashed worker. This commit fixes the problem by resetting rw_pid for all eligible background workers during the crash-and-restart cycle. Back-patched to v18, where the bug was introduced. Bug fix patches were proposed by Andrey Rudometov and ChangAo Chen, but this commit uses a different approach. Reported-by: Andrey Rudometov Reported-by: ChangAo Chen Author: Andrey Rudometov Author: ChangAo Chen Co-authored-by: Fujii Masao Reviewed-by: ChangAo Chen Reviewed-by: Shveta Malik Discussion: https://postgr.es/m/CAF6JsWiO=i24qYitWe6ns1sXqcL86rYxdyU+pNYk-WueKPSySg@mail.gmail.com Discussion: https://postgr.es/m/tencent_E00A056B3953EE6440F0F40F80EC30427D09@qq.com Backpatch-through: 18 --- src/backend/postmaster/bgworker.c | 1 + src/backend/postmaster/postmaster.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c index 116ddf7b835f1..1ad65c237c34e 100644 --- a/src/backend/postmaster/bgworker.c +++ b/src/backend/postmaster/bgworker.c @@ -613,6 +613,7 @@ ResetBackgroundWorkerCrashTimes(void) * resetting. */ rw->rw_crashed_at = 0; + rw->rw_pid = 0; /* * If there was anyone waiting for it, they're history. diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index cca9b946e5384..e01d9f0cfe81e 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -2630,6 +2630,13 @@ CleanupBackend(PMChild *bp, } bp = NULL; + /* + * In a crash case, exit immediately without resetting background worker + * state. However, if restart_after_crash is enabled, the background + * worker state (e.g., rw_pid) still needs be reset so the worker can + * restart after crash recovery. This reset is handled in + * ResetBackgroundWorkerCrashTimes(), not here. + */ if (crashed) { HandleChildCrash(bp_pid, exitstatus, procname); From 1dfe3ef3f960d6924eb1f18facf4fbdae6e1cc1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Fri, 25 Jul 2025 12:03:19 +0200 Subject: [PATCH 12/67] Refactor grammar to create opt_utility_option_list This changes the grammar for REINDEX, CHECKPOINT, CLUSTER, ANALYZE/ANALYSE; they still accept the same options as before, but the grammar is written differently for convenience of future development. Reviewed-by: Nathan Bossart Discussion: https://postgr.es/m/202507231538.ir7pjzoow6oe@alvherre.pgsql --- src/backend/parser/gram.y | 124 +++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 68 deletions(-) diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 73345bb3c7045..db43034b9db57 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -318,6 +318,11 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type opt_qualified_name %type opt_concurrently %type opt_drop_behavior +%type opt_utility_option_list +%type utility_option_list +%type utility_option_elem +%type utility_option_name +%type utility_option_arg %type alter_column_default opclass_item opclass_drop alter_using %type add_drop opt_asc_desc opt_nulls_order @@ -338,10 +343,6 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); create_extension_opt_item alter_extension_opt_item %type opt_lock lock_type cast_context -%type utility_option_name -%type utility_option_elem -%type utility_option_list -%type utility_option_arg %type drop_option %type opt_or_replace opt_no opt_grant_grant_option @@ -556,7 +557,6 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type generic_option_list alter_generic_option_list %type reindex_target_relation reindex_target_all -%type opt_reindex_option_list %type copy_generic_opt_arg copy_generic_opt_arg_list_item %type copy_generic_opt_elem @@ -1141,6 +1141,41 @@ opt_drop_behavior: | /* EMPTY */ { $$ = DROP_RESTRICT; /* default */ } ; +opt_utility_option_list: + '(' utility_option_list ')' { $$ = $2; } + | /* EMPTY */ { $$ = NULL; } + ; + +utility_option_list: + utility_option_elem + { + $$ = list_make1($1); + } + | utility_option_list ',' utility_option_elem + { + $$ = lappend($1, $3); + } + ; + +utility_option_elem: + utility_option_name utility_option_arg + { + $$ = makeDefElem($1, $2, @1); + } + ; + +utility_option_name: + NonReservedWord { $$ = $1; } + | analyze_keyword { $$ = "analyze"; } + | FORMAT_LA { $$ = "format"; } + ; + +utility_option_arg: + opt_boolean_or_string { $$ = (Node *) makeString($1); } + | NumericOnly { $$ = (Node *) $1; } + | /* EMPTY */ { $$ = NULL; } + ; + /***************************************************************************** * * CALL statement @@ -2028,18 +2063,12 @@ constraints_set_mode: * Checkpoint statement */ CheckPointStmt: - CHECKPOINT + CHECKPOINT opt_utility_option_list { CheckPointStmt *n = makeNode(CheckPointStmt); $$ = (Node *) n; - } - | CHECKPOINT '(' utility_option_list ')' - { - CheckPointStmt *n = makeNode(CheckPointStmt); - - $$ = (Node *) n; - n->options = $3; + n->options = $2; } ; @@ -9354,7 +9383,7 @@ DropTransformStmt: DROP TRANSFORM opt_if_exists FOR Typename LANGUAGE name opt_d *****************************************************************************/ ReindexStmt: - REINDEX opt_reindex_option_list reindex_target_relation opt_concurrently qualified_name + REINDEX opt_utility_option_list reindex_target_relation opt_concurrently qualified_name { ReindexStmt *n = makeNode(ReindexStmt); @@ -9367,7 +9396,7 @@ ReindexStmt: makeDefElem("concurrently", NULL, @4)); $$ = (Node *) n; } - | REINDEX opt_reindex_option_list SCHEMA opt_concurrently name + | REINDEX opt_utility_option_list SCHEMA opt_concurrently name { ReindexStmt *n = makeNode(ReindexStmt); @@ -9380,7 +9409,7 @@ ReindexStmt: makeDefElem("concurrently", NULL, @4)); $$ = (Node *) n; } - | REINDEX opt_reindex_option_list reindex_target_all opt_concurrently opt_single_name + | REINDEX opt_utility_option_list reindex_target_all opt_concurrently opt_single_name { ReindexStmt *n = makeNode(ReindexStmt); @@ -9402,10 +9431,6 @@ reindex_target_all: SYSTEM_P { $$ = REINDEX_OBJECT_SYSTEM; } | DATABASE { $$ = REINDEX_OBJECT_DATABASE; } ; -opt_reindex_option_list: - '(' utility_option_list ')' { $$ = $2; } - | /* EMPTY */ { $$ = NULL; } - ; /***************************************************************************** * @@ -11903,13 +11928,13 @@ ClusterStmt: n->params = $3; $$ = (Node *) n; } - | CLUSTER '(' utility_option_list ')' + | CLUSTER opt_utility_option_list { ClusterStmt *n = makeNode(ClusterStmt); n->relation = NULL; n->indexname = NULL; - n->params = $3; + n->params = $2; $$ = (Node *) n; } /* unparenthesized VERBOSE kept for pre-14 compatibility */ @@ -11919,21 +11944,18 @@ ClusterStmt: n->relation = $3; n->indexname = $4; - n->params = NIL; if ($2) - n->params = lappend(n->params, makeDefElem("verbose", NULL, @2)); + n->params = list_make1(makeDefElem("verbose", NULL, @2)); $$ = (Node *) n; } /* unparenthesized VERBOSE kept for pre-17 compatibility */ - | CLUSTER opt_verbose + | CLUSTER VERBOSE { ClusterStmt *n = makeNode(ClusterStmt); n->relation = NULL; n->indexname = NULL; - n->params = NIL; - if ($2) - n->params = lappend(n->params, makeDefElem("verbose", NULL, @2)); + n->params = list_make1(makeDefElem("verbose", NULL, @2)); $$ = (Node *) n; } /* kept for pre-8.3 compatibility */ @@ -11943,9 +11965,8 @@ ClusterStmt: n->relation = $5; n->indexname = $3; - n->params = NIL; if ($2) - n->params = lappend(n->params, makeDefElem("verbose", NULL, @2)); + n->params = list_make1(makeDefElem("verbose", NULL, @2)); $$ = (Node *) n; } ; @@ -11996,64 +12017,31 @@ VacuumStmt: VACUUM opt_full opt_freeze opt_verbose opt_analyze opt_vacuum_relati } ; -AnalyzeStmt: analyze_keyword opt_verbose opt_vacuum_relation_list +AnalyzeStmt: analyze_keyword opt_utility_option_list opt_vacuum_relation_list { VacuumStmt *n = makeNode(VacuumStmt); - n->options = NIL; - if ($2) - n->options = lappend(n->options, - makeDefElem("verbose", NULL, @2)); + n->options = $2; n->rels = $3; n->is_vacuumcmd = false; $$ = (Node *) n; } - | analyze_keyword '(' utility_option_list ')' opt_vacuum_relation_list + | analyze_keyword VERBOSE opt_vacuum_relation_list { VacuumStmt *n = makeNode(VacuumStmt); - n->options = $3; - n->rels = $5; + n->options = list_make1(makeDefElem("verbose", NULL, @2)); + n->rels = $3; n->is_vacuumcmd = false; $$ = (Node *) n; } ; -utility_option_list: - utility_option_elem - { - $$ = list_make1($1); - } - | utility_option_list ',' utility_option_elem - { - $$ = lappend($1, $3); - } - ; - analyze_keyword: ANALYZE | ANALYSE /* British */ ; -utility_option_elem: - utility_option_name utility_option_arg - { - $$ = makeDefElem($1, $2, @1); - } - ; - -utility_option_name: - NonReservedWord { $$ = $1; } - | analyze_keyword { $$ = "analyze"; } - | FORMAT_LA { $$ = "format"; } - ; - -utility_option_arg: - opt_boolean_or_string { $$ = (Node *) makeString($1); } - | NumericOnly { $$ = (Node *) $1; } - | /* EMPTY */ { $$ = NULL; } - ; - opt_analyze: analyze_keyword { $$ = true; } | /*EMPTY*/ { $$ = false; } From 5457ea46d181f8b8dbe1ae482720b23bff4029de Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 25 Jul 2025 10:56:55 -0400 Subject: [PATCH 13/67] Fix dynahash's HASH_FIXED_SIZE ("isfixed") option. This flag was effectively a no-op in EXEC_BACKEND (ie, Windows) builds, because it was kept in the process-local HTAB struct, and it could only ever become set in the postmaster's copy. The simplest fix is to move it to the shared HASHHDR struct. We could keep a copy in HTAB as well, as we do with keysize and some other fields, but the "too much contention" argument doesn't seem to apply here: we only examine isfixed during element_alloc(), which had better not get hit very often for a shared hashtable. This oversight dates to 7c797e719 which invented the option. But back-patching doesn't seem appropriate given the lack of field complaints. If there is anyone running an affected workload on Windows, they might be unhappy about the behavior changing in a minor release. Author: Aidar Imamov Reviewed-by: Tom Lane Discussion: https://postgr.es/m/4d0cb35ff01c5c74d2b9a582ecb73823@postgrespro.ru --- src/backend/utils/hash/dynahash.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index 1ad155d446e51..42e9be274fc6a 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -195,6 +195,7 @@ struct HASHHDR long ssize; /* segment size --- must be power of 2 */ int sshift; /* segment shift = log2(ssize) */ int nelem_alloc; /* number of entries to allocate at once */ + bool isfixed; /* if true, don't enlarge */ #ifdef HASH_STATISTICS @@ -227,7 +228,6 @@ struct HTAB MemoryContext hcxt; /* memory context if default allocator used */ char *tabname; /* table name (for error messages) */ bool isshared; /* true if table is in shared memory */ - bool isfixed; /* if true, don't enlarge */ /* freezing a shared table isn't allowed, so we can keep state here */ bool frozen; /* true = no more inserts allowed */ @@ -618,8 +618,10 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) } } + /* Set isfixed if requested, but not till after we build initial entries */ if (flags & HASH_FIXED_SIZE) - hashp->isfixed = true; + hctl->isfixed = true; + return hashp; } @@ -644,6 +646,8 @@ hdefault(HTAB *hashp) hctl->ssize = DEF_SEGSIZE; hctl->sshift = DEF_SEGSIZE_SHIFT; + hctl->isfixed = false; /* can be enlarged */ + #ifdef HASH_STATISTICS hctl->accesses = hctl->collisions = 0; #endif @@ -1713,7 +1717,7 @@ element_alloc(HTAB *hashp, int nelem, int freelist_idx) HASHELEMENT *prevElement; int i; - if (hashp->isfixed) + if (hctl->isfixed) return false; /* Each element has a HASHELEMENT header plus user data. */ From 7d8f5957792421ec3bb9d1b9b6ca25d689d974b7 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 25 Jul 2025 16:30:00 -0400 Subject: [PATCH 14/67] Create infrastructure to reliably prevent leakage of PGresults. Commit 232d8caea fixed a case where postgres_fdw could lose track of a PGresult object, resulting in a process-lifespan memory leak. But I have little faith that there aren't other potential PGresult leakages, now or in future, in the backend modules that use libpq. Therefore, this patch proposes infrastructure that makes all PGresults returned from libpq act as though they are palloc'd in the CurrentMemoryContext (with the option to relocate them to another context later). This should greatly reduce the risk of careless leaks, and it also permits removal of a bunch of code that attempted to prevent such leaks via PG_TRY blocks. This patch adds infrastructure that wraps each PGresult in a "libpqsrv_PGresult" that provides a memory context reset callback to PQclear the PGresult. Code using this abstraction is inherently memory-safe to the same extent as we are accustomed to in most backend code. Furthermore, we add some macros that automatically redirect calls of the libpq functions concerned with PGresults to use this infrastructure, so that almost no source-code changes are needed to wheel this infrastructure into place in all the backend code that uses libpq. Perhaps in future we could create similar infrastructure for PGconn objects, but there seems less need for that. This patch just creates the infrastructure and makes relevant code use it, including reverting 232d8caea in favor of this mechanism. A good deal of follow-on simplification is possible now that we don't have to be so cautious about freeing PGresults, but I'll put that in a separate patch. Author: Tom Lane Reviewed-by: Matheus Alcantara Discussion: https://postgr.es/m/2976982.1748049023@sss.pgh.pa.us --- contrib/postgres_fdw/postgres_fdw.c | 36 +--- contrib/postgres_fdw/postgres_fdw.h | 2 +- src/backend/utils/mmgr/mcxt.c | 39 +++- src/include/libpq/libpq-be-fe-helpers.h | 27 +-- src/include/libpq/libpq-be-fe.h | 259 ++++++++++++++++++++++++ src/include/utils/palloc.h | 2 + src/tools/pgindent/typedefs.list | 1 + 7 files changed, 322 insertions(+), 44 deletions(-) create mode 100644 src/include/libpq/libpq-be-fe.h diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index e0a34b27c7cfd..3a84d06cfd1f7 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -240,7 +240,6 @@ typedef struct PgFdwDirectModifyState PGresult *result; /* result for query */ int num_tuples; /* # of result tuples */ int next_tuple; /* index of next one to return */ - MemoryContextCallback result_cb; /* ensures result will get freed */ Relation resultRel; /* relcache entry for the target relation */ AttrNumber *attnoMap; /* array of attnums of input user columns */ AttrNumber ctidAttno; /* attnum of input ctid column */ @@ -2671,17 +2670,6 @@ postgresBeginDirectModify(ForeignScanState *node, int eflags) dmstate = (PgFdwDirectModifyState *) palloc0(sizeof(PgFdwDirectModifyState)); node->fdw_state = dmstate; - /* - * We use a memory context callback to ensure that the dmstate's PGresult - * (if any) will be released, even if the query fails somewhere that's - * outside our control. The callback is always armed for the duration of - * the query; this relies on PQclear(NULL) being a no-op. - */ - dmstate->result_cb.func = (MemoryContextCallbackFunction) PQclear; - dmstate->result_cb.arg = NULL; - MemoryContextRegisterResetCallback(CurrentMemoryContext, - &dmstate->result_cb); - /* * Identify which user to do the remote access as. This should match what * ExecCheckPermissions() does. @@ -2829,13 +2817,7 @@ postgresEndDirectModify(ForeignScanState *node) return; /* Release PGresult */ - if (dmstate->result) - { - PQclear(dmstate->result); - dmstate->result = NULL; - /* ... and don't forget to disable the callback */ - dmstate->result_cb.arg = NULL; - } + PQclear(dmstate->result); /* Release remote connection */ ReleaseConnection(dmstate->conn); @@ -4615,20 +4597,20 @@ execute_dml_stmt(ForeignScanState *node) /* * Get the result, and check for success. - * - * We use a memory context callback to ensure that the PGresult will be - * released, even if the query fails somewhere that's outside our control. - * The callback is already registered, just need to fill in its arg. */ - Assert(dmstate->result == NULL); dmstate->result = pgfdw_get_result(dmstate->conn); - dmstate->result_cb.arg = dmstate->result; - if (PQresultStatus(dmstate->result) != (dmstate->has_returning ? PGRES_TUPLES_OK : PGRES_COMMAND_OK)) - pgfdw_report_error(ERROR, dmstate->result, dmstate->conn, false, + pgfdw_report_error(ERROR, dmstate->result, dmstate->conn, true, dmstate->query); + /* + * The result potentially needs to survive across multiple executor row + * cycles, so move it to the context where the dmstate is. + */ + dmstate->result = libpqsrv_PGresultSetParent(dmstate->result, + GetMemoryChunkContext(dmstate)); + /* Get the number of rows affected. */ if (dmstate->has_returning) dmstate->num_tuples = PQntuples(dmstate->result); diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h index 81358f3bde7df..9cb4ee84139ea 100644 --- a/contrib/postgres_fdw/postgres_fdw.h +++ b/contrib/postgres_fdw/postgres_fdw.h @@ -15,7 +15,7 @@ #include "foreign/foreign.h" #include "lib/stringinfo.h" -#include "libpq-fe.h" +#include "libpq/libpq-be-fe.h" #include "nodes/execnodes.h" #include "nodes/pathnodes.h" #include "utils/relcache.h" diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index 15fa4d0a55eeb..ce01dce9861da 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -560,9 +560,7 @@ MemoryContextDeleteChildren(MemoryContext context) * the specified context, since that means it will automatically be freed * when no longer needed. * - * There is no API for deregistering a callback once registered. If you - * want it to not do anything anymore, adjust the state pointed to by its - * "arg" to indicate that. + * Note that callers can assume this cannot fail. */ void MemoryContextRegisterResetCallback(MemoryContext context, @@ -577,6 +575,41 @@ MemoryContextRegisterResetCallback(MemoryContext context, context->isReset = false; } +/* + * MemoryContextUnregisterResetCallback + * Undo the effects of MemoryContextRegisterResetCallback. + * + * This can be used if a callback's effects are no longer required + * at some point before the context has been reset/deleted. It is the + * caller's responsibility to pfree the callback struct (if needed). + * + * An assertion failure occurs if the callback was not registered. + * We could alternatively define that case as a no-op, but that seems too + * likely to mask programming errors such as passing the wrong context. + */ +void +MemoryContextUnregisterResetCallback(MemoryContext context, + MemoryContextCallback *cb) +{ + MemoryContextCallback *prev, + *cur; + + Assert(MemoryContextIsValid(context)); + + for (prev = NULL, cur = context->reset_cbs; cur != NULL; + prev = cur, cur = cur->next) + { + if (cur != cb) + continue; + if (prev) + prev->next = cur->next; + else + context->reset_cbs = cur->next; + return; + } + Assert(false); +} + /* * MemoryContextCallResetCallbacks * Internal function to call all registered callbacks for context. diff --git a/src/include/libpq/libpq-be-fe-helpers.h b/src/include/libpq/libpq-be-fe-helpers.h index af13bd6bf3da3..8d12a331497f8 100644 --- a/src/include/libpq/libpq-be-fe-helpers.h +++ b/src/include/libpq/libpq-be-fe-helpers.h @@ -30,17 +30,7 @@ #ifndef LIBPQ_BE_FE_HELPERS_H #define LIBPQ_BE_FE_HELPERS_H -/* - * Despite the name, BUILDING_DLL is set only when building code directly part - * of the backend. Which also is where libpq isn't allowed to be - * used. Obviously this doesn't protect against libpq-fe.h getting included - * otherwise, but perhaps still protects against a few mistakes... - */ -#ifdef BUILDING_DLL -#error "libpq may not be used code directly built into the backend" -#endif - -#include "libpq-fe.h" +#include "libpq/libpq-be-fe.h" #include "miscadmin.h" #include "storage/fd.h" #include "storage/latch.h" @@ -462,13 +452,21 @@ exit: ; * This function is intended to be set via PQsetNoticeReceiver() so that * NOTICE, WARNING, and similar messages from the connection are reported via * ereport(), instead of being printed to stderr. + * + * Because this will be called from libpq with a "real" (not wrapped) + * PGresult, we need to temporarily ignore libpq-be-fe.h's wrapper macros + * for PGresult and also PQresultErrorMessage, and put back the wrappers + * afterwards. That's not pretty, but there seems no better alternative. */ +#undef PGresult +#undef PQresultErrorMessage + static inline void libpqsrv_notice_receiver(void *arg, const PGresult *res) { - char *message; + const char *message; int len; - char *prefix = (char *) arg; + const char *prefix = (const char *) arg; /* * Trim the trailing newline from the message text returned from @@ -484,4 +482,7 @@ libpqsrv_notice_receiver(void *arg, const PGresult *res) errmsg_internal("%s: %.*s", prefix, len, message)); } +#define PGresult libpqsrv_PGresult +#define PQresultErrorMessage libpqsrv_PQresultErrorMessage + #endif /* LIBPQ_BE_FE_HELPERS_H */ diff --git a/src/include/libpq/libpq-be-fe.h b/src/include/libpq/libpq-be-fe.h new file mode 100644 index 0000000000000..e3f796b023092 --- /dev/null +++ b/src/include/libpq/libpq-be-fe.h @@ -0,0 +1,259 @@ +/*------------------------------------------------------------------------- + * + * libpq-be-fe.h + * Wrapper functions for using libpq in extensions + * + * Code built directly into the backend is not allowed to link to libpq + * directly. Extension code is allowed to use libpq however. One of the + * main risks in doing so is leaking the malloc-allocated structures + * returned by libpq, causing a process-lifespan memory leak. + * + * This file provides wrapper objects to help in building memory-safe code. + * A PGresult object wrapped this way acts much as if it were palloc'd: + * it will go away when the specified context is reset or deleted. + * We might later extend the concept to other objects such as PGconns. + * + * See also the libpq-be-fe-helpers.h file, which provides additional + * facilities built on top of this one. + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/libpq/libpq-be-fe.h + * + *------------------------------------------------------------------------- + */ +#ifndef LIBPQ_BE_FE_H +#define LIBPQ_BE_FE_H + +/* + * Despite the name, BUILDING_DLL is set only when building code directly part + * of the backend. Which also is where libpq isn't allowed to be + * used. Obviously this doesn't protect against libpq-fe.h getting included + * otherwise, but perhaps still protects against a few mistakes... + */ +#ifdef BUILDING_DLL +#error "libpq may not be used in code directly built into the backend" +#endif + +#include "libpq-fe.h" + +/* + * Memory-context-safe wrapper object for a PGresult. + */ +typedef struct libpqsrv_PGresult +{ + PGresult *res; /* the wrapped PGresult */ + MemoryContext ctx; /* the MemoryContext it's attached to */ + MemoryContextCallback cb; /* the callback that implements freeing */ +} libpqsrv_PGresult; + + +/* + * Wrap the given PGresult in a libpqsrv_PGresult object, so that it will + * go away automatically if the current memory context is reset or deleted. + * + * To avoid potential memory leaks, backend code must always apply this + * immediately to the output of any PGresult-yielding libpq function. + */ +static inline libpqsrv_PGresult * +libpqsrv_PQwrap(PGresult *res) +{ + libpqsrv_PGresult *bres; + MemoryContext ctx = CurrentMemoryContext; + + /* We pass through a NULL result as-is, since there's nothing to free */ + if (res == NULL) + return NULL; + /* Attempt to allocate the wrapper ... this had better not throw error */ + bres = (libpqsrv_PGresult *) + MemoryContextAllocExtended(ctx, + sizeof(libpqsrv_PGresult), + MCXT_ALLOC_NO_OOM); + /* If we failed to allocate a wrapper, free the PGresult before failing */ + if (bres == NULL) + { + PQclear(res); + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + } + /* Okay, set up the wrapper */ + bres->res = res; + bres->ctx = ctx; + bres->cb.func = (MemoryContextCallbackFunction) PQclear; + bres->cb.arg = res; + MemoryContextRegisterResetCallback(ctx, &bres->cb); + return bres; +} + +/* + * Free a wrapped PGresult, after detaching it from the memory context. + * Like PQclear(), allow the argument to be NULL. + */ +static inline void +libpqsrv_PQclear(libpqsrv_PGresult *bres) +{ + if (bres) + { + MemoryContextUnregisterResetCallback(bres->ctx, &bres->cb); + PQclear(bres->res); + pfree(bres); + } +} + +/* + * Move a wrapped PGresult to have a different parent context. + */ +static inline libpqsrv_PGresult * +libpqsrv_PGresultSetParent(libpqsrv_PGresult *bres, MemoryContext ctx) +{ + libpqsrv_PGresult *newres; + + /* We pass through a NULL result as-is */ + if (bres == NULL) + return NULL; + /* Make a new wrapper in the target context, raising error on OOM */ + newres = (libpqsrv_PGresult *) + MemoryContextAlloc(ctx, sizeof(libpqsrv_PGresult)); + /* Okay, set up the new wrapper */ + newres->res = bres->res; + newres->ctx = ctx; + newres->cb.func = (MemoryContextCallbackFunction) PQclear; + newres->cb.arg = bres->res; + MemoryContextRegisterResetCallback(ctx, &newres->cb); + /* Disarm and delete the old wrapper */ + MemoryContextUnregisterResetCallback(bres->ctx, &bres->cb); + pfree(bres); + return newres; +} + +/* + * Convenience wrapper for PQgetResult. + * + * We could supply wrappers for other PGresult-returning functions too, + * but at present there's no need. + */ +static inline libpqsrv_PGresult * +libpqsrv_PQgetResult(PGconn *conn) +{ + return libpqsrv_PQwrap(PQgetResult(conn)); +} + +/* + * Accessor functions for libpqsrv_PGresult. While it's not necessary to use + * these, they emulate the behavior of the underlying libpq functions when + * passed a NULL pointer. This is particularly important for PQresultStatus, + * which is often the first check on a result. + */ + +static inline ExecStatusType +libpqsrv_PQresultStatus(const libpqsrv_PGresult *res) +{ + if (!res) + return PGRES_FATAL_ERROR; + return PQresultStatus(res->res); +} + +static inline const char * +libpqsrv_PQresultErrorMessage(const libpqsrv_PGresult *res) +{ + if (!res) + return ""; + return PQresultErrorMessage(res->res); +} + +static inline char * +libpqsrv_PQresultErrorField(const libpqsrv_PGresult *res, int fieldcode) +{ + if (!res) + return NULL; + return PQresultErrorField(res->res, fieldcode); +} + +static inline char * +libpqsrv_PQcmdStatus(const libpqsrv_PGresult *res) +{ + if (!res) + return NULL; + return PQcmdStatus(res->res); +} + +static inline int +libpqsrv_PQntuples(const libpqsrv_PGresult *res) +{ + if (!res) + return 0; + return PQntuples(res->res); +} + +static inline int +libpqsrv_PQnfields(const libpqsrv_PGresult *res) +{ + if (!res) + return 0; + return PQnfields(res->res); +} + +static inline char * +libpqsrv_PQgetvalue(const libpqsrv_PGresult *res, int tup_num, int field_num) +{ + if (!res) + return NULL; + return PQgetvalue(res->res, tup_num, field_num); +} + +static inline int +libpqsrv_PQgetlength(const libpqsrv_PGresult *res, int tup_num, int field_num) +{ + if (!res) + return 0; + return PQgetlength(res->res, tup_num, field_num); +} + +static inline int +libpqsrv_PQgetisnull(const libpqsrv_PGresult *res, int tup_num, int field_num) +{ + if (!res) + return 1; /* pretend it is null */ + return PQgetisnull(res->res, tup_num, field_num); +} + +static inline char * +libpqsrv_PQfname(const libpqsrv_PGresult *res, int field_num) +{ + if (!res) + return NULL; + return PQfname(res->res, field_num); +} + +static inline const char * +libpqsrv_PQcmdTuples(const libpqsrv_PGresult *res) +{ + if (!res) + return ""; + return PQcmdTuples(res->res); +} + +/* + * Redefine these libpq entry point names concerned with PGresults so that + * they will operate on libpqsrv_PGresults instead. This avoids needing to + * convert a lot of pre-existing code, and reduces the notational differences + * between frontend and backend libpq-using code. + */ +#define PGresult libpqsrv_PGresult +#define PQclear libpqsrv_PQclear +#define PQgetResult libpqsrv_PQgetResult +#define PQresultStatus libpqsrv_PQresultStatus +#define PQresultErrorMessage libpqsrv_PQresultErrorMessage +#define PQresultErrorField libpqsrv_PQresultErrorField +#define PQcmdStatus libpqsrv_PQcmdStatus +#define PQntuples libpqsrv_PQntuples +#define PQnfields libpqsrv_PQnfields +#define PQgetvalue libpqsrv_PQgetvalue +#define PQgetlength libpqsrv_PQgetlength +#define PQgetisnull libpqsrv_PQgetisnull +#define PQfname libpqsrv_PQfname +#define PQcmdTuples libpqsrv_PQcmdTuples + +#endif /* LIBPQ_BE_FE_H */ diff --git a/src/include/utils/palloc.h b/src/include/utils/palloc.h index e1b42267b22aa..039b9cba61a32 100644 --- a/src/include/utils/palloc.h +++ b/src/include/utils/palloc.h @@ -133,6 +133,8 @@ MemoryContextSwitchTo(MemoryContext context) /* Registration of memory context reset/delete callbacks */ extern void MemoryContextRegisterResetCallback(MemoryContext context, MemoryContextCallback *cb); +extern void MemoryContextUnregisterResetCallback(MemoryContext context, + MemoryContextCallback *cb); /* * These are like standard strdup() except the copied string is diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 4353befab9934..3daba26b23723 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3757,6 +3757,7 @@ leafSegmentInfo leaf_item libpq_gettext_func libpq_source +libpqsrv_PGresult line_t lineno_t list_sort_comparator From 80aa9848befc13c188d2775a859deaf172fdd3a2 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 25 Jul 2025 16:31:43 -0400 Subject: [PATCH 15/67] Reap the benefits of not having to avoid leaking PGresults. Remove a bunch of PG_TRY constructs, de-volatilize related variables, remove some PQclear calls in error paths. Aside from making the code simpler and shorter, this should provide some marginal performance gains. For ease of review, I did not re-indent code within the removed PG_TRY constructs. That'll be done in a separate patch. Author: Tom Lane Reviewed-by: Matheus Alcantara Discussion: https://postgr.es/m/2976982.1748049023@sss.pgh.pa.us --- contrib/dblink/dblink.c | 96 +++-------- contrib/postgres_fdw/connection.c | 52 ++---- contrib/postgres_fdw/postgres_fdw.c | 163 ++++-------------- contrib/postgres_fdw/postgres_fdw.h | 2 +- .../libpqwalreceiver/libpqwalreceiver.c | 31 +--- src/include/libpq/libpq-be-fe-helpers.h | 13 +- 6 files changed, 83 insertions(+), 274 deletions(-) diff --git a/contrib/dblink/dblink.c b/contrib/dblink/dblink.c index de5bed282f3f0..fc423c0544d3f 100644 --- a/contrib/dblink/dblink.c +++ b/contrib/dblink/dblink.c @@ -101,8 +101,8 @@ static void materializeQueryResult(FunctionCallInfo fcinfo, const char *conname, const char *sql, bool fail); -static PGresult *storeQueryResult(volatile storeInfo *sinfo, PGconn *conn, const char *sql); -static void storeRow(volatile storeInfo *sinfo, PGresult *res, bool first); +static PGresult *storeQueryResult(storeInfo *sinfo, PGconn *conn, const char *sql); +static void storeRow(storeInfo *sinfo, PGresult *res, bool first); static remoteConn *getConnectionByName(const char *name); static HTAB *createConnHash(void); static remoteConn *createNewConnection(const char *name); @@ -169,14 +169,6 @@ typedef struct remoteConnHashEnt /* initial number of connection hashes */ #define NUMCONN 16 -static char * -xpstrdup(const char *in) -{ - if (in == NULL) - return NULL; - return pstrdup(in); -} - pg_noreturn static void dblink_res_internalerror(PGconn *conn, PGresult *res, const char *p2) { @@ -870,17 +862,14 @@ static void materializeResult(FunctionCallInfo fcinfo, PGconn *conn, PGresult *res) { ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupdesc; + bool is_sql_cmd; + int ntuples; + int nfields; /* prepTuplestoreResult must have been called previously */ Assert(rsinfo->returnMode == SFRM_Materialize); - PG_TRY(); - { - TupleDesc tupdesc; - bool is_sql_cmd; - int ntuples; - int nfields; - if (PQresultStatus(res) == PGRES_COMMAND_OK) { is_sql_cmd = true; @@ -988,13 +977,8 @@ materializeResult(FunctionCallInfo fcinfo, PGconn *conn, PGresult *res) /* clean up GUC settings, if we changed any */ restoreLocalGucs(nestlevel); } - } - PG_FINALLY(); - { - /* be sure to release the libpq result */ + PQclear(res); - } - PG_END_TRY(); } /* @@ -1013,16 +997,17 @@ materializeQueryResult(FunctionCallInfo fcinfo, bool fail) { ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; - PGresult *volatile res = NULL; - volatile storeInfo sinfo = {0}; /* prepTuplestoreResult must have been called previously */ Assert(rsinfo->returnMode == SFRM_Materialize); - sinfo.fcinfo = fcinfo; - + /* Use a PG_TRY block to ensure we pump libpq dry of results */ PG_TRY(); { + storeInfo sinfo = {0}; + PGresult *res; + + sinfo.fcinfo = fcinfo; /* Create short-lived memory context for data conversions */ sinfo.tmpcontext = AllocSetContextCreate(CurrentMemoryContext, "dblink temporary context", @@ -1035,14 +1020,7 @@ materializeQueryResult(FunctionCallInfo fcinfo, (PQresultStatus(res) != PGRES_COMMAND_OK && PQresultStatus(res) != PGRES_TUPLES_OK)) { - /* - * dblink_res_error will clear the passed PGresult, so we need - * this ugly dance to avoid doing so twice during error exit - */ - PGresult *res1 = res; - - res = NULL; - dblink_res_error(conn, conname, res1, fail, + dblink_res_error(conn, conname, res, fail, "while executing query"); /* if fail isn't set, we'll return an empty query result */ } @@ -1081,7 +1059,6 @@ materializeQueryResult(FunctionCallInfo fcinfo, tuplestore_puttuple(tupstore, tuple); PQclear(res); - res = NULL; } else { @@ -1090,26 +1067,20 @@ materializeQueryResult(FunctionCallInfo fcinfo, Assert(rsinfo->setResult != NULL); PQclear(res); - res = NULL; } /* clean up data conversion short-lived memory context */ if (sinfo.tmpcontext != NULL) MemoryContextDelete(sinfo.tmpcontext); - sinfo.tmpcontext = NULL; PQclear(sinfo.last_res); - sinfo.last_res = NULL; PQclear(sinfo.cur_res); - sinfo.cur_res = NULL; } PG_CATCH(); { - /* be sure to release any libpq result we collected */ - PQclear(res); - PQclear(sinfo.last_res); - PQclear(sinfo.cur_res); - /* and clear out any pending data in libpq */ + PGresult *res; + + /* be sure to clear out any pending data in libpq */ while ((res = libpqsrv_get_result(conn, dblink_we_get_result)) != NULL) PQclear(res); @@ -1122,7 +1093,7 @@ materializeQueryResult(FunctionCallInfo fcinfo, * Execute query, and send any result rows to sinfo->tuplestore. */ static PGresult * -storeQueryResult(volatile storeInfo *sinfo, PGconn *conn, const char *sql) +storeQueryResult(storeInfo *sinfo, PGconn *conn, const char *sql) { bool first = true; int nestlevel = -1; @@ -1190,7 +1161,7 @@ storeQueryResult(volatile storeInfo *sinfo, PGconn *conn, const char *sql) * (in this case the PGresult might contain either zero or one row). */ static void -storeRow(volatile storeInfo *sinfo, PGresult *res, bool first) +storeRow(storeInfo *sinfo, PGresult *res, bool first) { int nfields = PQnfields(res); HeapTuple tuple; @@ -2795,10 +2766,13 @@ dblink_connstr_check(const char *connstr) /* * Report an error received from the remote server * - * res: the received error result (will be freed) + * res: the received error result * fail: true for ERROR ereport, false for NOTICE * fmt and following args: sprintf-style format and values for errcontext; * the resulting string should be worded like "while " + * + * If "res" is not NULL, it'll be PQclear'ed here (unless we throw error, + * in which case memory context cleanup will clear it eventually). */ static void dblink_res_error(PGconn *conn, const char *conname, PGresult *res, @@ -2806,15 +2780,11 @@ dblink_res_error(PGconn *conn, const char *conname, PGresult *res, { int level; char *pg_diag_sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE); - char *pg_diag_message_primary = PQresultErrorField(res, PG_DIAG_MESSAGE_PRIMARY); - char *pg_diag_message_detail = PQresultErrorField(res, PG_DIAG_MESSAGE_DETAIL); - char *pg_diag_message_hint = PQresultErrorField(res, PG_DIAG_MESSAGE_HINT); - char *pg_diag_context = PQresultErrorField(res, PG_DIAG_CONTEXT); + char *message_primary = PQresultErrorField(res, PG_DIAG_MESSAGE_PRIMARY); + char *message_detail = PQresultErrorField(res, PG_DIAG_MESSAGE_DETAIL); + char *message_hint = PQresultErrorField(res, PG_DIAG_MESSAGE_HINT); + char *message_context = PQresultErrorField(res, PG_DIAG_CONTEXT); int sqlstate; - char *message_primary; - char *message_detail; - char *message_hint; - char *message_context; va_list ap; char dblink_context_msg[512]; @@ -2832,11 +2802,6 @@ dblink_res_error(PGconn *conn, const char *conname, PGresult *res, else sqlstate = ERRCODE_CONNECTION_FAILURE; - message_primary = xpstrdup(pg_diag_message_primary); - message_detail = xpstrdup(pg_diag_message_detail); - message_hint = xpstrdup(pg_diag_message_hint); - message_context = xpstrdup(pg_diag_context); - /* * If we don't get a message from the PGresult, try the PGconn. This is * needed because for connection-level failures, PQgetResult may just @@ -2845,14 +2810,6 @@ dblink_res_error(PGconn *conn, const char *conname, PGresult *res, if (message_primary == NULL) message_primary = pchomp(PQerrorMessage(conn)); - /* - * Now that we've copied all the data we need out of the PGresult, it's - * safe to free it. We must do this to avoid PGresult leakage. We're - * leaking all the strings too, but those are in palloc'd memory that will - * get cleaned up eventually. - */ - PQclear(res); - /* * Format the basic errcontext string. Below, we'll add on something * about the connection name. That's a violation of the translatability @@ -2877,6 +2834,7 @@ dblink_res_error(PGconn *conn, const char *conname, PGresult *res, dblink_context_msg, conname)) : (errcontext("%s on unnamed dblink connection", dblink_context_msg)))); + PQclear(res); } /* diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c index c1ce6f3343665..c654c1a1ff0a7 100644 --- a/contrib/postgres_fdw/connection.c +++ b/contrib/postgres_fdw/connection.c @@ -815,7 +815,7 @@ static void do_sql_command_begin(PGconn *conn, const char *sql) { if (!PQsendQuery(conn, sql)) - pgfdw_report_error(ERROR, NULL, conn, false, sql); + pgfdw_report_error(ERROR, NULL, conn, sql); } static void @@ -830,10 +830,10 @@ do_sql_command_end(PGconn *conn, const char *sql, bool consume_input) * would be large compared to the overhead of PQconsumeInput.) */ if (consume_input && !PQconsumeInput(conn)) - pgfdw_report_error(ERROR, NULL, conn, false, sql); + pgfdw_report_error(ERROR, NULL, conn, sql); res = pgfdw_get_result(conn); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, conn, true, sql); + pgfdw_report_error(ERROR, res, conn, sql); PQclear(res); } @@ -967,22 +967,21 @@ pgfdw_get_result(PGconn *conn) * Report an error we got from the remote server. * * elevel: error level to use (typically ERROR, but might be less) - * res: PGresult containing the error + * res: PGresult containing the error (might be NULL) * conn: connection we did the query on - * clear: if true, PQclear the result (otherwise caller will handle it) * sql: NULL, or text of remote command we tried to execute * + * If "res" is not NULL, it'll be PQclear'ed here (unless we throw error, + * in which case memory context cleanup will clear it eventually). + * * Note: callers that choose not to throw ERROR for a remote error are * responsible for making sure that the associated ConnCacheEntry gets * marked with have_error = true. */ void pgfdw_report_error(int elevel, PGresult *res, PGconn *conn, - bool clear, const char *sql) + const char *sql) { - /* If requested, PGresult must be released before leaving this function. */ - PG_TRY(); - { char *diag_sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE); char *message_primary = PQresultErrorField(res, PG_DIAG_MESSAGE_PRIMARY); char *message_detail = PQresultErrorField(res, PG_DIAG_MESSAGE_DETAIL); @@ -1016,13 +1015,7 @@ pgfdw_report_error(int elevel, PGresult *res, PGconn *conn, message_hint ? errhint("%s", message_hint) : 0, message_context ? errcontext("%s", message_context) : 0, sql ? errcontext("remote SQL command: %s", sql) : 0)); - } - PG_FINALLY(); - { - if (clear) PQclear(res); - } - PG_END_TRY(); } /* @@ -1545,7 +1538,7 @@ pgfdw_exec_cleanup_query_begin(PGconn *conn, const char *query) */ if (!PQsendQuery(conn, query)) { - pgfdw_report_error(WARNING, NULL, conn, false, query); + pgfdw_report_error(WARNING, NULL, conn, query); return false; } @@ -1570,7 +1563,7 @@ pgfdw_exec_cleanup_query_end(PGconn *conn, const char *query, */ if (consume_input && !PQconsumeInput(conn)) { - pgfdw_report_error(WARNING, NULL, conn, false, query); + pgfdw_report_error(WARNING, NULL, conn, query); return false; } @@ -1582,7 +1575,7 @@ pgfdw_exec_cleanup_query_end(PGconn *conn, const char *query, (errmsg("could not get query result due to timeout"), errcontext("remote SQL command: %s", query))); else - pgfdw_report_error(WARNING, NULL, conn, false, query); + pgfdw_report_error(WARNING, NULL, conn, query); return false; } @@ -1590,7 +1583,7 @@ pgfdw_exec_cleanup_query_end(PGconn *conn, const char *query, /* Issue a warning if not successful. */ if (PQresultStatus(result) != PGRES_COMMAND_OK) { - pgfdw_report_error(WARNING, result, conn, true, query); + pgfdw_report_error(WARNING, result, conn, query); return ignore_errors; } PQclear(result); @@ -1618,17 +1611,12 @@ pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime, PGresult **result, bool *timed_out) { - volatile bool failed = false; - PGresult *volatile last_res = NULL; + bool failed = false; + PGresult *last_res = NULL; + int canceldelta = RETRY_CANCEL_TIMEOUT * 2; *result = NULL; *timed_out = false; - - /* In what follows, do not leak any PGresults on an error. */ - PG_TRY(); - { - int canceldelta = RETRY_CANCEL_TIMEOUT * 2; - for (;;) { PGresult *res; @@ -1706,15 +1694,7 @@ pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime, PQclear(last_res); last_res = res; } -exit: ; - } - PG_CATCH(); - { - PQclear(last_res); - PG_RE_THROW(); - } - PG_END_TRY(); - +exit: if (failed) PQclear(last_res); else diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 3a84d06cfd1f7..f2dee7b1c69ab 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -1702,13 +1702,9 @@ postgresReScanForeignScan(ForeignScanState *node) return; } - /* - * We don't use a PG_TRY block here, so be careful not to throw error - * without releasing the PGresult. - */ res = pgfdw_exec_query(fsstate->conn, sql, fsstate->conn_state); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, fsstate->conn, true, sql); + pgfdw_report_error(ERROR, res, fsstate->conn, sql); PQclear(res); /* Now force a fresh FETCH. */ @@ -3608,11 +3604,7 @@ get_remote_estimate(const char *sql, PGconn *conn, double *rows, int *width, Cost *startup_cost, Cost *total_cost) { - PGresult *volatile res = NULL; - - /* PGresult must be released before leaving this function. */ - PG_TRY(); - { + PGresult *res; char *line; char *p; int n; @@ -3622,7 +3614,7 @@ get_remote_estimate(const char *sql, PGconn *conn, */ res = pgfdw_exec_query(conn, sql, NULL); if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, false, sql); + pgfdw_report_error(ERROR, res, conn, sql); /* * Extract cost numbers for topmost plan node. Note we search for a @@ -3637,12 +3629,7 @@ get_remote_estimate(const char *sql, PGconn *conn, startup_cost, total_cost, rows, width); if (n != 4) elog(ERROR, "could not interpret EXPLAIN output: \"%s\"", line); - } - PG_FINALLY(); - { PQclear(res); - } - PG_END_TRY(); } /* @@ -3782,17 +3769,14 @@ create_cursor(ForeignScanState *node) */ if (!PQsendQueryParams(conn, buf.data, numParams, NULL, values, NULL, NULL, 0)) - pgfdw_report_error(ERROR, NULL, conn, false, buf.data); + pgfdw_report_error(ERROR, NULL, conn, buf.data); /* * Get the result, and check for success. - * - * We don't use a PG_TRY block here, so be careful not to throw error - * without releasing the PGresult. */ res = pgfdw_get_result(conn); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, conn, true, fsstate->query); + pgfdw_report_error(ERROR, res, conn, fsstate->query); PQclear(res); /* Mark the cursor as created, and show no tuples have been retrieved */ @@ -3814,7 +3798,10 @@ static void fetch_more_data(ForeignScanState *node) { PgFdwScanState *fsstate = (PgFdwScanState *) node->fdw_state; - PGresult *volatile res = NULL; + PGconn *conn = fsstate->conn; + PGresult *res; + int numrows; + int i; MemoryContext oldcontext; /* @@ -3825,13 +3812,6 @@ fetch_more_data(ForeignScanState *node) MemoryContextReset(fsstate->batch_cxt); oldcontext = MemoryContextSwitchTo(fsstate->batch_cxt); - /* PGresult must be released before leaving this function. */ - PG_TRY(); - { - PGconn *conn = fsstate->conn; - int numrows; - int i; - if (fsstate->async_capable) { Assert(fsstate->conn_state->pendingAreq); @@ -3843,7 +3823,7 @@ fetch_more_data(ForeignScanState *node) res = pgfdw_get_result(conn); /* On error, report the original query, not the FETCH. */ if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, false, fsstate->query); + pgfdw_report_error(ERROR, res, conn, fsstate->query); /* Reset per-connection state */ fsstate->conn_state->pendingAreq = NULL; @@ -3859,7 +3839,7 @@ fetch_more_data(ForeignScanState *node) res = pgfdw_exec_query(conn, sql, fsstate->conn_state); /* On error, report the original query, not the FETCH. */ if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, false, fsstate->query); + pgfdw_report_error(ERROR, res, conn, fsstate->query); } /* Convert the data into HeapTuples */ @@ -3887,12 +3867,8 @@ fetch_more_data(ForeignScanState *node) /* Must be EOF if we didn't get as many tuples as we asked for. */ fsstate->eof_reached = (numrows < fsstate->fetch_size); - } - PG_FINALLY(); - { + PQclear(res); - } - PG_END_TRY(); MemoryContextSwitchTo(oldcontext); } @@ -3966,14 +3942,9 @@ close_cursor(PGconn *conn, unsigned int cursor_number, PGresult *res; snprintf(sql, sizeof(sql), "CLOSE c%u", cursor_number); - - /* - * We don't use a PG_TRY block here, so be careful not to throw error - * without releasing the PGresult. - */ res = pgfdw_exec_query(conn, sql, conn_state); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, conn, true, sql); + pgfdw_report_error(ERROR, res, conn, sql); PQclear(res); } @@ -4181,18 +4152,15 @@ execute_foreign_modify(EState *estate, NULL, NULL, 0)) - pgfdw_report_error(ERROR, NULL, fmstate->conn, false, fmstate->query); + pgfdw_report_error(ERROR, NULL, fmstate->conn, fmstate->query); /* * Get the result, and check for success. - * - * We don't use a PG_TRY block here, so be careful not to throw error - * without releasing the PGresult. */ res = pgfdw_get_result(fmstate->conn); if (PQresultStatus(res) != (fmstate->has_returning ? PGRES_TUPLES_OK : PGRES_COMMAND_OK)) - pgfdw_report_error(ERROR, res, fmstate->conn, true, fmstate->query); + pgfdw_report_error(ERROR, res, fmstate->conn, fmstate->query); /* Check number of rows affected, and fetch RETURNING tuple if any */ if (fmstate->has_returning) @@ -4251,17 +4219,14 @@ prepare_foreign_modify(PgFdwModifyState *fmstate) fmstate->query, 0, NULL)) - pgfdw_report_error(ERROR, NULL, fmstate->conn, false, fmstate->query); + pgfdw_report_error(ERROR, NULL, fmstate->conn, fmstate->query); /* * Get the result, and check for success. - * - * We don't use a PG_TRY block here, so be careful not to throw error - * without releasing the PGresult. */ res = pgfdw_get_result(fmstate->conn); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, fmstate->conn, true, fmstate->query); + pgfdw_report_error(ERROR, res, fmstate->conn, fmstate->query); PQclear(res); /* This action shows that the prepare has been done. */ @@ -4352,16 +4317,11 @@ convert_prep_stmt_params(PgFdwModifyState *fmstate, /* * store_returning_result * Store the result of a RETURNING clause - * - * On error, be sure to release the PGresult on the way out. Callers do not - * have PG_TRY blocks to ensure this happens. */ static void store_returning_result(PgFdwModifyState *fmstate, TupleTableSlot *slot, PGresult *res) { - PG_TRY(); - { HeapTuple newtup; newtup = make_tuple_from_result_row(res, 0, @@ -4376,13 +4336,6 @@ store_returning_result(PgFdwModifyState *fmstate, * heaptuples directly, so allow for conversion. */ ExecForceStoreHeapTuple(newtup, slot, true); - } - PG_CATCH(); - { - PQclear(res); - PG_RE_THROW(); - } - PG_END_TRY(); } /* @@ -4418,14 +4371,9 @@ deallocate_query(PgFdwModifyState *fmstate) return; snprintf(sql, sizeof(sql), "DEALLOCATE %s", fmstate->p_name); - - /* - * We don't use a PG_TRY block here, so be careful not to throw error - * without releasing the PGresult. - */ res = pgfdw_exec_query(fmstate->conn, sql, fmstate->conn_state); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, fmstate->conn, true, sql); + pgfdw_report_error(ERROR, res, fmstate->conn, sql); PQclear(res); pfree(fmstate->p_name); fmstate->p_name = NULL; @@ -4593,7 +4541,7 @@ execute_dml_stmt(ForeignScanState *node) */ if (!PQsendQueryParams(dmstate->conn, dmstate->query, numParams, NULL, values, NULL, NULL, 0)) - pgfdw_report_error(ERROR, NULL, dmstate->conn, false, dmstate->query); + pgfdw_report_error(ERROR, NULL, dmstate->conn, dmstate->query); /* * Get the result, and check for success. @@ -4601,7 +4549,7 @@ execute_dml_stmt(ForeignScanState *node) dmstate->result = pgfdw_get_result(dmstate->conn); if (PQresultStatus(dmstate->result) != (dmstate->has_returning ? PGRES_TUPLES_OK : PGRES_COMMAND_OK)) - pgfdw_report_error(ERROR, dmstate->result, dmstate->conn, true, + pgfdw_report_error(ERROR, dmstate->result, dmstate->conn, dmstate->query); /* @@ -4947,7 +4895,7 @@ postgresAnalyzeForeignTable(Relation relation, UserMapping *user; PGconn *conn; StringInfoData sql; - PGresult *volatile res = NULL; + PGresult *res; /* Return the row-analysis function pointer */ *func = postgresAcquireSampleRowsFunc; @@ -4973,22 +4921,14 @@ postgresAnalyzeForeignTable(Relation relation, initStringInfo(&sql); deparseAnalyzeSizeSql(&sql, relation); - /* In what follows, do not risk leaking any PGresults. */ - PG_TRY(); - { res = pgfdw_exec_query(conn, sql.data, NULL); if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, false, sql.data); + pgfdw_report_error(ERROR, res, conn, sql.data); if (PQntuples(res) != 1 || PQnfields(res) != 1) elog(ERROR, "unexpected result from deparseAnalyzeSizeSql query"); *totalpages = strtoul(PQgetvalue(res, 0, 0), NULL, 10); - } - PG_FINALLY(); - { PQclear(res); - } - PG_END_TRY(); ReleaseConnection(conn); @@ -5009,9 +4949,9 @@ postgresGetAnalyzeInfoForForeignTable(Relation relation, bool *can_tablesample) UserMapping *user; PGconn *conn; StringInfoData sql; - PGresult *volatile res = NULL; - volatile double reltuples = -1; - volatile char relkind = 0; + PGresult *res; + double reltuples; + char relkind; /* assume the remote relation does not support TABLESAMPLE */ *can_tablesample = false; @@ -5030,24 +4970,15 @@ postgresGetAnalyzeInfoForForeignTable(Relation relation, bool *can_tablesample) initStringInfo(&sql); deparseAnalyzeInfoSql(&sql, relation); - /* In what follows, do not risk leaking any PGresults. */ - PG_TRY(); - { res = pgfdw_exec_query(conn, sql.data, NULL); if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, false, sql.data); + pgfdw_report_error(ERROR, res, conn, sql.data); if (PQntuples(res) != 1 || PQnfields(res) != 2) elog(ERROR, "unexpected result from deparseAnalyzeInfoSql query"); reltuples = strtod(PQgetvalue(res, 0, 0), NULL); relkind = *(PQgetvalue(res, 0, 1)); - } - PG_FINALLY(); - { - if (res) PQclear(res); - } - PG_END_TRY(); ReleaseConnection(conn); @@ -5090,7 +5021,9 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel, double reltuples; unsigned int cursor_number; StringInfoData sql; - PGresult *volatile res = NULL; + PGresult *res; + char fetch_sql[64]; + int fetch_size; ListCell *lc; /* Initialize workspace state */ @@ -5267,17 +5200,10 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel, deparseAnalyzeSql(&sql, relation, method, sample_frac, &astate.retrieved_attrs); - /* In what follows, do not risk leaking any PGresults. */ - PG_TRY(); - { - char fetch_sql[64]; - int fetch_size; - res = pgfdw_exec_query(conn, sql.data, NULL); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, conn, false, sql.data); + pgfdw_report_error(ERROR, res, conn, sql.data); PQclear(res); - res = NULL; /* * Determine the fetch size. The default is arbitrary, but shouldn't @@ -5328,7 +5254,7 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel, res = pgfdw_exec_query(conn, fetch_sql, NULL); /* On error, report the original query, not the FETCH. */ if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, false, sql.data); + pgfdw_report_error(ERROR, res, conn, sql.data); /* Process whatever we got. */ numrows = PQntuples(res); @@ -5336,7 +5262,6 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel, analyze_row_processor(res, i, &astate); PQclear(res); - res = NULL; /* Must be EOF if we didn't get all the rows requested. */ if (numrows < fetch_size) @@ -5345,13 +5270,6 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel, /* Close the cursor, just to be tidy. */ close_cursor(conn, cursor_number, NULL); - } - PG_CATCH(); - { - PQclear(res); - PG_RE_THROW(); - } - PG_END_TRY(); ReleaseConnection(conn); @@ -5463,7 +5381,7 @@ postgresImportForeignSchema(ImportForeignSchemaStmt *stmt, Oid serverOid) UserMapping *mapping; PGconn *conn; StringInfoData buf; - PGresult *volatile res = NULL; + PGresult *res; int numrows, i; ListCell *lc; @@ -5502,16 +5420,13 @@ postgresImportForeignSchema(ImportForeignSchemaStmt *stmt, Oid serverOid) /* Create workspace for strings */ initStringInfo(&buf); - /* In what follows, do not risk leaking any PGresults. */ - PG_TRY(); - { /* Check that the schema really exists */ appendStringInfoString(&buf, "SELECT 1 FROM pg_catalog.pg_namespace WHERE nspname = "); deparseStringLiteral(&buf, stmt->remote_schema); res = pgfdw_exec_query(conn, buf.data, NULL); if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, false, buf.data); + pgfdw_report_error(ERROR, res, conn, buf.data); if (PQntuples(res) != 1) ereport(ERROR, @@ -5520,7 +5435,6 @@ postgresImportForeignSchema(ImportForeignSchemaStmt *stmt, Oid serverOid) stmt->remote_schema, server->servername))); PQclear(res); - res = NULL; resetStringInfo(&buf); /* @@ -5628,7 +5542,7 @@ postgresImportForeignSchema(ImportForeignSchemaStmt *stmt, Oid serverOid) /* Fetch the data */ res = pgfdw_exec_query(conn, buf.data, NULL); if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, false, buf.data); + pgfdw_report_error(ERROR, res, conn, buf.data); /* Process results */ numrows = PQntuples(res); @@ -5733,12 +5647,7 @@ postgresImportForeignSchema(ImportForeignSchemaStmt *stmt, Oid serverOid) commands = lappend(commands, pstrdup(buf.data)); } - } - PG_FINALLY(); - { PQclear(res); - } - PG_END_TRY(); ReleaseConnection(conn); @@ -7406,7 +7315,7 @@ postgresForeignAsyncNotify(AsyncRequest *areq) /* On error, report the original query, not the FETCH. */ if (!PQconsumeInput(fsstate->conn)) - pgfdw_report_error(ERROR, NULL, fsstate->conn, false, fsstate->query); + pgfdw_report_error(ERROR, NULL, fsstate->conn, fsstate->query); fetch_more_data(node); @@ -7505,7 +7414,7 @@ fetch_more_data_begin(AsyncRequest *areq) fsstate->fetch_size, fsstate->cursor_number); if (!PQsendQuery(fsstate->conn, sql)) - pgfdw_report_error(ERROR, NULL, fsstate->conn, false, fsstate->query); + pgfdw_report_error(ERROR, NULL, fsstate->conn, fsstate->query); /* Remember that the request is in process */ fsstate->conn_state->pendingAreq = areq; diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h index 9cb4ee84139ea..38e1a88594131 100644 --- a/contrib/postgres_fdw/postgres_fdw.h +++ b/contrib/postgres_fdw/postgres_fdw.h @@ -167,7 +167,7 @@ extern PGresult *pgfdw_get_result(PGconn *conn); extern PGresult *pgfdw_exec_query(PGconn *conn, const char *query, PgFdwConnState *state); extern void pgfdw_report_error(int elevel, PGresult *res, PGconn *conn, - bool clear, const char *sql); + const char *sql); /* in option.c */ extern int ExtractConnectionOptions(List *defelems, diff --git a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c index 886d99951dddf..239641bfbb66a 100644 --- a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c +++ b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c @@ -421,31 +421,22 @@ libpqrcv_identify_system(WalReceiverConn *conn, TimeLineID *primary_tli) "IDENTIFY_SYSTEM", WAIT_EVENT_LIBPQWALRECEIVER_RECEIVE); if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - PQclear(res); ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("could not receive database system identifier and timeline ID from " "the primary server: %s", pchomp(PQerrorMessage(conn->streamConn))))); - } /* * IDENTIFY_SYSTEM returns 3 columns in 9.3 and earlier, and 4 columns in * 9.4 and onwards. */ if (PQnfields(res) < 3 || PQntuples(res) != 1) - { - int ntuples = PQntuples(res); - int nfields = PQnfields(res); - - PQclear(res); ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("invalid response from primary server"), errdetail("Could not identify system: got %d rows and %d fields, expected %d rows and %d or more fields.", - ntuples, nfields, 1, 3))); - } + PQntuples(res), PQnfields(res), 1, 3))); primary_sysid = pstrdup(PQgetvalue(res, 0, 0)); *primary_tli = pg_strtoint32(PQgetvalue(res, 0, 1)); PQclear(res); @@ -607,13 +598,10 @@ libpqrcv_startstreaming(WalReceiverConn *conn, return false; } else if (PQresultStatus(res) != PGRES_COPY_BOTH) - { - PQclear(res); ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("could not start WAL streaming: %s", pchomp(PQerrorMessage(conn->streamConn))))); - } PQclear(res); return true; } @@ -721,26 +709,17 @@ libpqrcv_readtimelinehistoryfile(WalReceiverConn *conn, cmd, WAIT_EVENT_LIBPQWALRECEIVER_RECEIVE); if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - PQclear(res); ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("could not receive timeline history file from " "the primary server: %s", pchomp(PQerrorMessage(conn->streamConn))))); - } if (PQnfields(res) != 2 || PQntuples(res) != 1) - { - int ntuples = PQntuples(res); - int nfields = PQnfields(res); - - PQclear(res); ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("invalid response from primary server"), errdetail("Expected 1 tuple with 2 fields, got %d tuples with %d fields.", - ntuples, nfields))); - } + PQntuples(res), PQnfields(res)))); *filename = pstrdup(PQgetvalue(res, 0, 0)); *len = PQgetlength(res, 0, 1); @@ -844,13 +823,10 @@ libpqrcv_receive(WalReceiverConn *conn, char **buffer, return -1; } else - { - PQclear(res); ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("could not receive data from WAL stream: %s", pchomp(PQerrorMessage(conn->streamConn))))); - } } if (rawlen < -1) ereport(ERROR, @@ -974,13 +950,10 @@ libpqrcv_create_slot(WalReceiverConn *conn, const char *slotname, pfree(cmd.data); if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - PQclear(res); ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("could not create replication slot \"%s\": %s", slotname, pchomp(PQerrorMessage(conn->streamConn))))); - } if (lsn) *lsn = DatumGetLSN(DirectFunctionCall1Coll(pg_lsn_in, InvalidOid, diff --git a/src/include/libpq/libpq-be-fe-helpers.h b/src/include/libpq/libpq-be-fe-helpers.h index 8d12a331497f8..4ba635aa96f92 100644 --- a/src/include/libpq/libpq-be-fe-helpers.h +++ b/src/include/libpq/libpq-be-fe-helpers.h @@ -279,11 +279,8 @@ libpqsrv_exec_params(PGconn *conn, static inline PGresult * libpqsrv_get_result_last(PGconn *conn, uint32 wait_event_info) { - PGresult *volatile lastResult = NULL; + PGresult *lastResult = NULL; - /* In what follows, do not leak any PGresults on an error. */ - PG_TRY(); - { for (;;) { /* Wait for, and collect, the next PGresult. */ @@ -306,14 +303,6 @@ libpqsrv_get_result_last(PGconn *conn, uint32 wait_event_info) PQstatus(conn) == CONNECTION_BAD) break; } - } - PG_CATCH(); - { - PQclear(lastResult); - PG_RE_THROW(); - } - PG_END_TRY(); - return lastResult; } From 73873805fb3627cb23937c750fa83ffd8f16fc6c Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 25 Jul 2025 16:36:44 -0400 Subject: [PATCH 16/67] Run pgindent on the changes of the previous patch. This step can be checked mechanically. Author: Tom Lane Reviewed-by: Matheus Alcantara Discussion: https://postgr.es/m/2976982.1748049023@sss.pgh.pa.us --- contrib/dblink/dblink.c | 180 +++--- contrib/postgres_fdw/connection.c | 188 +++---- contrib/postgres_fdw/postgres_fdw.c | 703 ++++++++++++------------ src/include/libpq/libpq-be-fe-helpers.h | 38 +- 4 files changed, 553 insertions(+), 556 deletions(-) diff --git a/contrib/dblink/dblink.c b/contrib/dblink/dblink.c index fc423c0544d3f..f98805fb5f735 100644 --- a/contrib/dblink/dblink.c +++ b/contrib/dblink/dblink.c @@ -870,115 +870,115 @@ materializeResult(FunctionCallInfo fcinfo, PGconn *conn, PGresult *res) /* prepTuplestoreResult must have been called previously */ Assert(rsinfo->returnMode == SFRM_Materialize); - if (PQresultStatus(res) == PGRES_COMMAND_OK) - { - is_sql_cmd = true; - - /* - * need a tuple descriptor representing one TEXT column to return - * the command status string as our result tuple - */ - tupdesc = CreateTemplateTupleDesc(1); - TupleDescInitEntry(tupdesc, (AttrNumber) 1, "status", - TEXTOID, -1, 0); - ntuples = 1; - nfields = 1; - } - else - { - Assert(PQresultStatus(res) == PGRES_TUPLES_OK); + if (PQresultStatus(res) == PGRES_COMMAND_OK) + { + is_sql_cmd = true; - is_sql_cmd = false; + /* + * need a tuple descriptor representing one TEXT column to return the + * command status string as our result tuple + */ + tupdesc = CreateTemplateTupleDesc(1); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "status", + TEXTOID, -1, 0); + ntuples = 1; + nfields = 1; + } + else + { + Assert(PQresultStatus(res) == PGRES_TUPLES_OK); - /* get a tuple descriptor for our result type */ - switch (get_call_result_type(fcinfo, NULL, &tupdesc)) - { - case TYPEFUNC_COMPOSITE: - /* success */ - break; - case TYPEFUNC_RECORD: - /* failed to determine actual type of RECORD */ - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("function returning record called in context " - "that cannot accept type record"))); - break; - default: - /* result type isn't composite */ - elog(ERROR, "return type must be a row type"); - break; - } + is_sql_cmd = false; - /* make sure we have a persistent copy of the tupdesc */ - tupdesc = CreateTupleDescCopy(tupdesc); - ntuples = PQntuples(res); - nfields = PQnfields(res); + /* get a tuple descriptor for our result type */ + switch (get_call_result_type(fcinfo, NULL, &tupdesc)) + { + case TYPEFUNC_COMPOSITE: + /* success */ + break; + case TYPEFUNC_RECORD: + /* failed to determine actual type of RECORD */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("function returning record called in context " + "that cannot accept type record"))); + break; + default: + /* result type isn't composite */ + elog(ERROR, "return type must be a row type"); + break; } - /* - * check result and tuple descriptor have the same number of columns - */ - if (nfields != tupdesc->natts) - ereport(ERROR, - (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("remote query result rowtype does not match " - "the specified FROM clause rowtype"))); + /* make sure we have a persistent copy of the tupdesc */ + tupdesc = CreateTupleDescCopy(tupdesc); + ntuples = PQntuples(res); + nfields = PQnfields(res); + } - if (ntuples > 0) - { - AttInMetadata *attinmeta; - int nestlevel = -1; - Tuplestorestate *tupstore; - MemoryContext oldcontext; - int row; - char **values; + /* + * check result and tuple descriptor have the same number of columns + */ + if (nfields != tupdesc->natts) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("remote query result rowtype does not match " + "the specified FROM clause rowtype"))); - attinmeta = TupleDescGetAttInMetadata(tupdesc); + if (ntuples > 0) + { + AttInMetadata *attinmeta; + int nestlevel = -1; + Tuplestorestate *tupstore; + MemoryContext oldcontext; + int row; + char **values; - /* Set GUCs to ensure we read GUC-sensitive data types correctly */ - if (!is_sql_cmd) - nestlevel = applyRemoteGucs(conn); + attinmeta = TupleDescGetAttInMetadata(tupdesc); - oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory); - tupstore = tuplestore_begin_heap(true, false, work_mem); - rsinfo->setResult = tupstore; - rsinfo->setDesc = tupdesc; - MemoryContextSwitchTo(oldcontext); + /* Set GUCs to ensure we read GUC-sensitive data types correctly */ + if (!is_sql_cmd) + nestlevel = applyRemoteGucs(conn); - values = palloc_array(char *, nfields); + oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory); + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupdesc; + MemoryContextSwitchTo(oldcontext); - /* put all tuples into the tuplestore */ - for (row = 0; row < ntuples; row++) + values = palloc_array(char *, nfields); + + /* put all tuples into the tuplestore */ + for (row = 0; row < ntuples; row++) + { + HeapTuple tuple; + + if (!is_sql_cmd) { - HeapTuple tuple; + int i; - if (!is_sql_cmd) - { - int i; - - for (i = 0; i < nfields; i++) - { - if (PQgetisnull(res, row, i)) - values[i] = NULL; - else - values[i] = PQgetvalue(res, row, i); - } - } - else + for (i = 0; i < nfields; i++) { - values[0] = PQcmdStatus(res); + if (PQgetisnull(res, row, i)) + values[i] = NULL; + else + values[i] = PQgetvalue(res, row, i); } - - /* build the tuple and put it into the tuplestore. */ - tuple = BuildTupleFromCStrings(attinmeta, values); - tuplestore_puttuple(tupstore, tuple); + } + else + { + values[0] = PQcmdStatus(res); } - /* clean up GUC settings, if we changed any */ - restoreLocalGucs(nestlevel); + /* build the tuple and put it into the tuplestore. */ + tuple = BuildTupleFromCStrings(attinmeta, values); + tuplestore_puttuple(tupstore, tuple); } - PQclear(res); + /* clean up GUC settings, if we changed any */ + restoreLocalGucs(nestlevel); + } + + PQclear(res); } /* diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c index c654c1a1ff0a7..a33843fcf8531 100644 --- a/contrib/postgres_fdw/connection.c +++ b/contrib/postgres_fdw/connection.c @@ -982,40 +982,40 @@ void pgfdw_report_error(int elevel, PGresult *res, PGconn *conn, const char *sql) { - char *diag_sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE); - char *message_primary = PQresultErrorField(res, PG_DIAG_MESSAGE_PRIMARY); - char *message_detail = PQresultErrorField(res, PG_DIAG_MESSAGE_DETAIL); - char *message_hint = PQresultErrorField(res, PG_DIAG_MESSAGE_HINT); - char *message_context = PQresultErrorField(res, PG_DIAG_CONTEXT); - int sqlstate; - - if (diag_sqlstate) - sqlstate = MAKE_SQLSTATE(diag_sqlstate[0], - diag_sqlstate[1], - diag_sqlstate[2], - diag_sqlstate[3], - diag_sqlstate[4]); - else - sqlstate = ERRCODE_CONNECTION_FAILURE; + char *diag_sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE); + char *message_primary = PQresultErrorField(res, PG_DIAG_MESSAGE_PRIMARY); + char *message_detail = PQresultErrorField(res, PG_DIAG_MESSAGE_DETAIL); + char *message_hint = PQresultErrorField(res, PG_DIAG_MESSAGE_HINT); + char *message_context = PQresultErrorField(res, PG_DIAG_CONTEXT); + int sqlstate; + + if (diag_sqlstate) + sqlstate = MAKE_SQLSTATE(diag_sqlstate[0], + diag_sqlstate[1], + diag_sqlstate[2], + diag_sqlstate[3], + diag_sqlstate[4]); + else + sqlstate = ERRCODE_CONNECTION_FAILURE; - /* - * If we don't get a message from the PGresult, try the PGconn. This - * is needed because for connection-level failures, PQgetResult may - * just return NULL, not a PGresult at all. - */ - if (message_primary == NULL) - message_primary = pchomp(PQerrorMessage(conn)); - - ereport(elevel, - (errcode(sqlstate), - (message_primary != NULL && message_primary[0] != '\0') ? - errmsg_internal("%s", message_primary) : - errmsg("could not obtain message string for remote error"), - message_detail ? errdetail_internal("%s", message_detail) : 0, - message_hint ? errhint("%s", message_hint) : 0, - message_context ? errcontext("%s", message_context) : 0, - sql ? errcontext("remote SQL command: %s", sql) : 0)); - PQclear(res); + /* + * If we don't get a message from the PGresult, try the PGconn. This is + * needed because for connection-level failures, PQgetResult may just + * return NULL, not a PGresult at all. + */ + if (message_primary == NULL) + message_primary = pchomp(PQerrorMessage(conn)); + + ereport(elevel, + (errcode(sqlstate), + (message_primary != NULL && message_primary[0] != '\0') ? + errmsg_internal("%s", message_primary) : + errmsg("could not obtain message string for remote error"), + message_detail ? errdetail_internal("%s", message_detail) : 0, + message_hint ? errhint("%s", message_hint) : 0, + message_context ? errcontext("%s", message_context) : 0, + sql ? errcontext("remote SQL command: %s", sql) : 0)); + PQclear(res); } /* @@ -1617,83 +1617,83 @@ pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime, *result = NULL; *timed_out = false; - for (;;) + for (;;) + { + PGresult *res; + + while (PQisBusy(conn)) { - PGresult *res; + int wc; + TimestampTz now = GetCurrentTimestamp(); + long cur_timeout; - while (PQisBusy(conn)) + /* If timeout has expired, give up. */ + if (now >= endtime) { - int wc; - TimestampTz now = GetCurrentTimestamp(); - long cur_timeout; - - /* If timeout has expired, give up. */ - if (now >= endtime) - { - *timed_out = true; - failed = true; - goto exit; - } + *timed_out = true; + failed = true; + goto exit; + } - /* If we need to re-issue the cancel request, do that. */ - if (now >= retrycanceltime) - { - /* We ignore failure to issue the repeated request. */ - (void) libpqsrv_cancel(conn, endtime); + /* If we need to re-issue the cancel request, do that. */ + if (now >= retrycanceltime) + { + /* We ignore failure to issue the repeated request. */ + (void) libpqsrv_cancel(conn, endtime); - /* Recompute "now" in case that took measurable time. */ - now = GetCurrentTimestamp(); + /* Recompute "now" in case that took measurable time. */ + now = GetCurrentTimestamp(); - /* Adjust re-cancel timeout in increasing steps. */ - retrycanceltime = TimestampTzPlusMilliseconds(now, - canceldelta); - canceldelta += canceldelta; - } + /* Adjust re-cancel timeout in increasing steps. */ + retrycanceltime = TimestampTzPlusMilliseconds(now, + canceldelta); + canceldelta += canceldelta; + } - /* If timeout has expired, give up, else get sleep time. */ - cur_timeout = TimestampDifferenceMilliseconds(now, - Min(endtime, - retrycanceltime)); - if (cur_timeout <= 0) - { - *timed_out = true; - failed = true; - goto exit; - } + /* If timeout has expired, give up, else get sleep time. */ + cur_timeout = TimestampDifferenceMilliseconds(now, + Min(endtime, + retrycanceltime)); + if (cur_timeout <= 0) + { + *timed_out = true; + failed = true; + goto exit; + } - /* first time, allocate or get the custom wait event */ - if (pgfdw_we_cleanup_result == 0) - pgfdw_we_cleanup_result = WaitEventExtensionNew("PostgresFdwCleanupResult"); + /* first time, allocate or get the custom wait event */ + if (pgfdw_we_cleanup_result == 0) + pgfdw_we_cleanup_result = WaitEventExtensionNew("PostgresFdwCleanupResult"); - /* Sleep until there's something to do */ - wc = WaitLatchOrSocket(MyLatch, - WL_LATCH_SET | WL_SOCKET_READABLE | - WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, - PQsocket(conn), - cur_timeout, pgfdw_we_cleanup_result); - ResetLatch(MyLatch); + /* Sleep until there's something to do */ + wc = WaitLatchOrSocket(MyLatch, + WL_LATCH_SET | WL_SOCKET_READABLE | + WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, + PQsocket(conn), + cur_timeout, pgfdw_we_cleanup_result); + ResetLatch(MyLatch); - CHECK_FOR_INTERRUPTS(); + CHECK_FOR_INTERRUPTS(); - /* Data available in socket? */ - if (wc & WL_SOCKET_READABLE) + /* Data available in socket? */ + if (wc & WL_SOCKET_READABLE) + { + if (!PQconsumeInput(conn)) { - if (!PQconsumeInput(conn)) - { - /* connection trouble */ - failed = true; - goto exit; - } + /* connection trouble */ + failed = true; + goto exit; } } + } - res = PQgetResult(conn); - if (res == NULL) - break; /* query is complete */ + res = PQgetResult(conn); + if (res == NULL) + break; /* query is complete */ - PQclear(last_res); - last_res = res; - } + PQclear(last_res); + last_res = res; + } exit: if (failed) PQclear(last_res); diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index f2dee7b1c69ab..25b287be069fa 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -3605,31 +3605,31 @@ get_remote_estimate(const char *sql, PGconn *conn, Cost *startup_cost, Cost *total_cost) { PGresult *res; - char *line; - char *p; - int n; + char *line; + char *p; + int n; - /* - * Execute EXPLAIN remotely. - */ - res = pgfdw_exec_query(conn, sql, NULL); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, sql); + /* + * Execute EXPLAIN remotely. + */ + res = pgfdw_exec_query(conn, sql, NULL); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pgfdw_report_error(ERROR, res, conn, sql); - /* - * Extract cost numbers for topmost plan node. Note we search for a - * left paren from the end of the line to avoid being confused by - * other uses of parentheses. - */ - line = PQgetvalue(res, 0, 0); - p = strrchr(line, '('); - if (p == NULL) - elog(ERROR, "could not interpret EXPLAIN output: \"%s\"", line); - n = sscanf(p, "(cost=%lf..%lf rows=%lf width=%d)", - startup_cost, total_cost, rows, width); - if (n != 4) - elog(ERROR, "could not interpret EXPLAIN output: \"%s\"", line); - PQclear(res); + /* + * Extract cost numbers for topmost plan node. Note we search for a left + * paren from the end of the line to avoid being confused by other uses of + * parentheses. + */ + line = PQgetvalue(res, 0, 0); + p = strrchr(line, '('); + if (p == NULL) + elog(ERROR, "could not interpret EXPLAIN output: \"%s\"", line); + n = sscanf(p, "(cost=%lf..%lf rows=%lf width=%d)", + startup_cost, total_cost, rows, width); + if (n != 4) + elog(ERROR, "could not interpret EXPLAIN output: \"%s\"", line); + PQclear(res); } /* @@ -3812,63 +3812,63 @@ fetch_more_data(ForeignScanState *node) MemoryContextReset(fsstate->batch_cxt); oldcontext = MemoryContextSwitchTo(fsstate->batch_cxt); - if (fsstate->async_capable) - { - Assert(fsstate->conn_state->pendingAreq); + if (fsstate->async_capable) + { + Assert(fsstate->conn_state->pendingAreq); - /* - * The query was already sent by an earlier call to - * fetch_more_data_begin. So now we just fetch the result. - */ - res = pgfdw_get_result(conn); - /* On error, report the original query, not the FETCH. */ - if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, fsstate->query); + /* + * The query was already sent by an earlier call to + * fetch_more_data_begin. So now we just fetch the result. + */ + res = pgfdw_get_result(conn); + /* On error, report the original query, not the FETCH. */ + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pgfdw_report_error(ERROR, res, conn, fsstate->query); - /* Reset per-connection state */ - fsstate->conn_state->pendingAreq = NULL; - } - else - { - char sql[64]; + /* Reset per-connection state */ + fsstate->conn_state->pendingAreq = NULL; + } + else + { + char sql[64]; - /* This is a regular synchronous fetch. */ - snprintf(sql, sizeof(sql), "FETCH %d FROM c%u", - fsstate->fetch_size, fsstate->cursor_number); + /* This is a regular synchronous fetch. */ + snprintf(sql, sizeof(sql), "FETCH %d FROM c%u", + fsstate->fetch_size, fsstate->cursor_number); - res = pgfdw_exec_query(conn, sql, fsstate->conn_state); - /* On error, report the original query, not the FETCH. */ - if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, fsstate->query); - } + res = pgfdw_exec_query(conn, sql, fsstate->conn_state); + /* On error, report the original query, not the FETCH. */ + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pgfdw_report_error(ERROR, res, conn, fsstate->query); + } - /* Convert the data into HeapTuples */ - numrows = PQntuples(res); - fsstate->tuples = (HeapTuple *) palloc0(numrows * sizeof(HeapTuple)); - fsstate->num_tuples = numrows; - fsstate->next_tuple = 0; + /* Convert the data into HeapTuples */ + numrows = PQntuples(res); + fsstate->tuples = (HeapTuple *) palloc0(numrows * sizeof(HeapTuple)); + fsstate->num_tuples = numrows; + fsstate->next_tuple = 0; - for (i = 0; i < numrows; i++) - { - Assert(IsA(node->ss.ps.plan, ForeignScan)); - - fsstate->tuples[i] = - make_tuple_from_result_row(res, i, - fsstate->rel, - fsstate->attinmeta, - fsstate->retrieved_attrs, - node, - fsstate->temp_cxt); - } + for (i = 0; i < numrows; i++) + { + Assert(IsA(node->ss.ps.plan, ForeignScan)); + + fsstate->tuples[i] = + make_tuple_from_result_row(res, i, + fsstate->rel, + fsstate->attinmeta, + fsstate->retrieved_attrs, + node, + fsstate->temp_cxt); + } - /* Update fetch_ct_2 */ - if (fsstate->fetch_ct_2 < 2) - fsstate->fetch_ct_2++; + /* Update fetch_ct_2 */ + if (fsstate->fetch_ct_2 < 2) + fsstate->fetch_ct_2++; - /* Must be EOF if we didn't get as many tuples as we asked for. */ - fsstate->eof_reached = (numrows < fsstate->fetch_size); + /* Must be EOF if we didn't get as many tuples as we asked for. */ + fsstate->eof_reached = (numrows < fsstate->fetch_size); - PQclear(res); + PQclear(res); MemoryContextSwitchTo(oldcontext); } @@ -4322,20 +4322,20 @@ static void store_returning_result(PgFdwModifyState *fmstate, TupleTableSlot *slot, PGresult *res) { - HeapTuple newtup; + HeapTuple newtup; - newtup = make_tuple_from_result_row(res, 0, - fmstate->rel, - fmstate->attinmeta, - fmstate->retrieved_attrs, - NULL, - fmstate->temp_cxt); + newtup = make_tuple_from_result_row(res, 0, + fmstate->rel, + fmstate->attinmeta, + fmstate->retrieved_attrs, + NULL, + fmstate->temp_cxt); - /* - * The returning slot will not necessarily be suitable to store - * heaptuples directly, so allow for conversion. - */ - ExecForceStoreHeapTuple(newtup, slot, true); + /* + * The returning slot will not necessarily be suitable to store heaptuples + * directly, so allow for conversion. + */ + ExecForceStoreHeapTuple(newtup, slot, true); } /* @@ -4921,14 +4921,14 @@ postgresAnalyzeForeignTable(Relation relation, initStringInfo(&sql); deparseAnalyzeSizeSql(&sql, relation); - res = pgfdw_exec_query(conn, sql.data, NULL); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, sql.data); + res = pgfdw_exec_query(conn, sql.data, NULL); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pgfdw_report_error(ERROR, res, conn, sql.data); - if (PQntuples(res) != 1 || PQnfields(res) != 1) - elog(ERROR, "unexpected result from deparseAnalyzeSizeSql query"); - *totalpages = strtoul(PQgetvalue(res, 0, 0), NULL, 10); - PQclear(res); + if (PQntuples(res) != 1 || PQnfields(res) != 1) + elog(ERROR, "unexpected result from deparseAnalyzeSizeSql query"); + *totalpages = strtoul(PQgetvalue(res, 0, 0), NULL, 10); + PQclear(res); ReleaseConnection(conn); @@ -4970,15 +4970,15 @@ postgresGetAnalyzeInfoForForeignTable(Relation relation, bool *can_tablesample) initStringInfo(&sql); deparseAnalyzeInfoSql(&sql, relation); - res = pgfdw_exec_query(conn, sql.data, NULL); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, sql.data); + res = pgfdw_exec_query(conn, sql.data, NULL); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pgfdw_report_error(ERROR, res, conn, sql.data); - if (PQntuples(res) != 1 || PQnfields(res) != 2) - elog(ERROR, "unexpected result from deparseAnalyzeInfoSql query"); - reltuples = strtod(PQgetvalue(res, 0, 0), NULL); - relkind = *(PQgetvalue(res, 0, 1)); - PQclear(res); + if (PQntuples(res) != 1 || PQnfields(res) != 2) + elog(ERROR, "unexpected result from deparseAnalyzeInfoSql query"); + reltuples = strtod(PQgetvalue(res, 0, 0), NULL); + relkind = *(PQgetvalue(res, 0, 1)); + PQclear(res); ReleaseConnection(conn); @@ -5200,76 +5200,76 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel, deparseAnalyzeSql(&sql, relation, method, sample_frac, &astate.retrieved_attrs); - res = pgfdw_exec_query(conn, sql.data, NULL); - if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, conn, sql.data); - PQclear(res); + res = pgfdw_exec_query(conn, sql.data, NULL); + if (PQresultStatus(res) != PGRES_COMMAND_OK) + pgfdw_report_error(ERROR, res, conn, sql.data); + PQclear(res); - /* - * Determine the fetch size. The default is arbitrary, but shouldn't - * be enormous. - */ - fetch_size = 100; - foreach(lc, server->options) - { - DefElem *def = (DefElem *) lfirst(lc); + /* + * Determine the fetch size. The default is arbitrary, but shouldn't be + * enormous. + */ + fetch_size = 100; + foreach(lc, server->options) + { + DefElem *def = (DefElem *) lfirst(lc); - if (strcmp(def->defname, "fetch_size") == 0) - { - (void) parse_int(defGetString(def), &fetch_size, 0, NULL); - break; - } - } - foreach(lc, table->options) + if (strcmp(def->defname, "fetch_size") == 0) { - DefElem *def = (DefElem *) lfirst(lc); + (void) parse_int(defGetString(def), &fetch_size, 0, NULL); + break; + } + } + foreach(lc, table->options) + { + DefElem *def = (DefElem *) lfirst(lc); - if (strcmp(def->defname, "fetch_size") == 0) - { - (void) parse_int(defGetString(def), &fetch_size, 0, NULL); - break; - } + if (strcmp(def->defname, "fetch_size") == 0) + { + (void) parse_int(defGetString(def), &fetch_size, 0, NULL); + break; } + } - /* Construct command to fetch rows from remote. */ - snprintf(fetch_sql, sizeof(fetch_sql), "FETCH %d FROM c%u", - fetch_size, cursor_number); + /* Construct command to fetch rows from remote. */ + snprintf(fetch_sql, sizeof(fetch_sql), "FETCH %d FROM c%u", + fetch_size, cursor_number); - /* Retrieve and process rows a batch at a time. */ - for (;;) - { - int numrows; - int i; + /* Retrieve and process rows a batch at a time. */ + for (;;) + { + int numrows; + int i; - /* Allow users to cancel long query */ - CHECK_FOR_INTERRUPTS(); + /* Allow users to cancel long query */ + CHECK_FOR_INTERRUPTS(); - /* - * XXX possible future improvement: if rowstoskip is large, we - * could issue a MOVE rather than physically fetching the rows, - * then just adjust rowstoskip and samplerows appropriately. - */ + /* + * XXX possible future improvement: if rowstoskip is large, we could + * issue a MOVE rather than physically fetching the rows, then just + * adjust rowstoskip and samplerows appropriately. + */ - /* Fetch some rows */ - res = pgfdw_exec_query(conn, fetch_sql, NULL); - /* On error, report the original query, not the FETCH. */ - if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, sql.data); + /* Fetch some rows */ + res = pgfdw_exec_query(conn, fetch_sql, NULL); + /* On error, report the original query, not the FETCH. */ + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pgfdw_report_error(ERROR, res, conn, sql.data); - /* Process whatever we got. */ - numrows = PQntuples(res); - for (i = 0; i < numrows; i++) - analyze_row_processor(res, i, &astate); + /* Process whatever we got. */ + numrows = PQntuples(res); + for (i = 0; i < numrows; i++) + analyze_row_processor(res, i, &astate); - PQclear(res); + PQclear(res); - /* Must be EOF if we didn't get all the rows requested. */ - if (numrows < fetch_size) - break; - } + /* Must be EOF if we didn't get all the rows requested. */ + if (numrows < fetch_size) + break; + } - /* Close the cursor, just to be tidy. */ - close_cursor(conn, cursor_number, NULL); + /* Close the cursor, just to be tidy. */ + close_cursor(conn, cursor_number, NULL); ReleaseConnection(conn); @@ -5420,234 +5420,231 @@ postgresImportForeignSchema(ImportForeignSchemaStmt *stmt, Oid serverOid) /* Create workspace for strings */ initStringInfo(&buf); - /* Check that the schema really exists */ - appendStringInfoString(&buf, "SELECT 1 FROM pg_catalog.pg_namespace WHERE nspname = "); - deparseStringLiteral(&buf, stmt->remote_schema); + /* Check that the schema really exists */ + appendStringInfoString(&buf, "SELECT 1 FROM pg_catalog.pg_namespace WHERE nspname = "); + deparseStringLiteral(&buf, stmt->remote_schema); - res = pgfdw_exec_query(conn, buf.data, NULL); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, buf.data); + res = pgfdw_exec_query(conn, buf.data, NULL); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pgfdw_report_error(ERROR, res, conn, buf.data); - if (PQntuples(res) != 1) - ereport(ERROR, - (errcode(ERRCODE_FDW_SCHEMA_NOT_FOUND), - errmsg("schema \"%s\" is not present on foreign server \"%s\"", - stmt->remote_schema, server->servername))); + if (PQntuples(res) != 1) + ereport(ERROR, + (errcode(ERRCODE_FDW_SCHEMA_NOT_FOUND), + errmsg("schema \"%s\" is not present on foreign server \"%s\"", + stmt->remote_schema, server->servername))); - PQclear(res); - resetStringInfo(&buf); + PQclear(res); + resetStringInfo(&buf); - /* - * Fetch all table data from this schema, possibly restricted by - * EXCEPT or LIMIT TO. (We don't actually need to pay any attention - * to EXCEPT/LIMIT TO here, because the core code will filter the - * statements we return according to those lists anyway. But it - * should save a few cycles to not process excluded tables in the - * first place.) - * - * Import table data for partitions only when they are explicitly - * specified in LIMIT TO clause. Otherwise ignore them and only - * include the definitions of the root partitioned tables to allow - * access to the complete remote data set locally in the schema - * imported. - * - * Note: because we run the connection with search_path restricted to - * pg_catalog, the format_type() and pg_get_expr() outputs will always - * include a schema name for types/functions in other schemas, which - * is what we want. - */ + /* + * Fetch all table data from this schema, possibly restricted by EXCEPT or + * LIMIT TO. (We don't actually need to pay any attention to EXCEPT/LIMIT + * TO here, because the core code will filter the statements we return + * according to those lists anyway. But it should save a few cycles to + * not process excluded tables in the first place.) + * + * Import table data for partitions only when they are explicitly + * specified in LIMIT TO clause. Otherwise ignore them and only include + * the definitions of the root partitioned tables to allow access to the + * complete remote data set locally in the schema imported. + * + * Note: because we run the connection with search_path restricted to + * pg_catalog, the format_type() and pg_get_expr() outputs will always + * include a schema name for types/functions in other schemas, which is + * what we want. + */ + appendStringInfoString(&buf, + "SELECT relname, " + " attname, " + " format_type(atttypid, atttypmod), " + " attnotnull, " + " pg_get_expr(adbin, adrelid), "); + + /* Generated columns are supported since Postgres 12 */ + if (PQserverVersion(conn) >= 120000) appendStringInfoString(&buf, - "SELECT relname, " - " attname, " - " format_type(atttypid, atttypmod), " - " attnotnull, " - " pg_get_expr(adbin, adrelid), "); - - /* Generated columns are supported since Postgres 12 */ - if (PQserverVersion(conn) >= 120000) - appendStringInfoString(&buf, - " attgenerated, "); - else - appendStringInfoString(&buf, - " NULL, "); - - if (import_collate) - appendStringInfoString(&buf, - " collname, " - " collnsp.nspname "); - else - appendStringInfoString(&buf, - " NULL, NULL "); - + " attgenerated, "); + else appendStringInfoString(&buf, - "FROM pg_class c " - " JOIN pg_namespace n ON " - " relnamespace = n.oid " - " LEFT JOIN pg_attribute a ON " - " attrelid = c.oid AND attnum > 0 " - " AND NOT attisdropped " - " LEFT JOIN pg_attrdef ad ON " - " adrelid = c.oid AND adnum = attnum "); - - if (import_collate) - appendStringInfoString(&buf, - " LEFT JOIN pg_collation coll ON " - " coll.oid = attcollation " - " LEFT JOIN pg_namespace collnsp ON " - " collnsp.oid = collnamespace "); + " NULL, "); + if (import_collate) appendStringInfoString(&buf, - "WHERE c.relkind IN (" - CppAsString2(RELKIND_RELATION) "," - CppAsString2(RELKIND_VIEW) "," - CppAsString2(RELKIND_FOREIGN_TABLE) "," - CppAsString2(RELKIND_MATVIEW) "," - CppAsString2(RELKIND_PARTITIONED_TABLE) ") " - " AND n.nspname = "); - deparseStringLiteral(&buf, stmt->remote_schema); + " collname, " + " collnsp.nspname "); + else + appendStringInfoString(&buf, + " NULL, NULL "); + + appendStringInfoString(&buf, + "FROM pg_class c " + " JOIN pg_namespace n ON " + " relnamespace = n.oid " + " LEFT JOIN pg_attribute a ON " + " attrelid = c.oid AND attnum > 0 " + " AND NOT attisdropped " + " LEFT JOIN pg_attrdef ad ON " + " adrelid = c.oid AND adnum = attnum "); + + if (import_collate) + appendStringInfoString(&buf, + " LEFT JOIN pg_collation coll ON " + " coll.oid = attcollation " + " LEFT JOIN pg_namespace collnsp ON " + " collnsp.oid = collnamespace "); + + appendStringInfoString(&buf, + "WHERE c.relkind IN (" + CppAsString2(RELKIND_RELATION) "," + CppAsString2(RELKIND_VIEW) "," + CppAsString2(RELKIND_FOREIGN_TABLE) "," + CppAsString2(RELKIND_MATVIEW) "," + CppAsString2(RELKIND_PARTITIONED_TABLE) ") " + " AND n.nspname = "); + deparseStringLiteral(&buf, stmt->remote_schema); + + /* Partitions are supported since Postgres 10 */ + if (PQserverVersion(conn) >= 100000 && + stmt->list_type != FDW_IMPORT_SCHEMA_LIMIT_TO) + appendStringInfoString(&buf, " AND NOT c.relispartition "); + + /* Apply restrictions for LIMIT TO and EXCEPT */ + if (stmt->list_type == FDW_IMPORT_SCHEMA_LIMIT_TO || + stmt->list_type == FDW_IMPORT_SCHEMA_EXCEPT) + { + bool first_item = true; - /* Partitions are supported since Postgres 10 */ - if (PQserverVersion(conn) >= 100000 && - stmt->list_type != FDW_IMPORT_SCHEMA_LIMIT_TO) - appendStringInfoString(&buf, " AND NOT c.relispartition "); + appendStringInfoString(&buf, " AND c.relname "); + if (stmt->list_type == FDW_IMPORT_SCHEMA_EXCEPT) + appendStringInfoString(&buf, "NOT "); + appendStringInfoString(&buf, "IN ("); - /* Apply restrictions for LIMIT TO and EXCEPT */ - if (stmt->list_type == FDW_IMPORT_SCHEMA_LIMIT_TO || - stmt->list_type == FDW_IMPORT_SCHEMA_EXCEPT) + /* Append list of table names within IN clause */ + foreach(lc, stmt->table_list) { - bool first_item = true; + RangeVar *rv = (RangeVar *) lfirst(lc); - appendStringInfoString(&buf, " AND c.relname "); - if (stmt->list_type == FDW_IMPORT_SCHEMA_EXCEPT) - appendStringInfoString(&buf, "NOT "); - appendStringInfoString(&buf, "IN ("); + if (first_item) + first_item = false; + else + appendStringInfoString(&buf, ", "); + deparseStringLiteral(&buf, rv->relname); + } + appendStringInfoChar(&buf, ')'); + } - /* Append list of table names within IN clause */ - foreach(lc, stmt->table_list) - { - RangeVar *rv = (RangeVar *) lfirst(lc); + /* Append ORDER BY at the end of query to ensure output ordering */ + appendStringInfoString(&buf, " ORDER BY c.relname, a.attnum"); - if (first_item) - first_item = false; - else - appendStringInfoString(&buf, ", "); - deparseStringLiteral(&buf, rv->relname); - } - appendStringInfoChar(&buf, ')'); - } + /* Fetch the data */ + res = pgfdw_exec_query(conn, buf.data, NULL); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pgfdw_report_error(ERROR, res, conn, buf.data); - /* Append ORDER BY at the end of query to ensure output ordering */ - appendStringInfoString(&buf, " ORDER BY c.relname, a.attnum"); + /* Process results */ + numrows = PQntuples(res); + /* note: incrementation of i happens in inner loop's while() test */ + for (i = 0; i < numrows;) + { + char *tablename = PQgetvalue(res, i, 0); + bool first_item = true; - /* Fetch the data */ - res = pgfdw_exec_query(conn, buf.data, NULL); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, buf.data); + resetStringInfo(&buf); + appendStringInfo(&buf, "CREATE FOREIGN TABLE %s (\n", + quote_identifier(tablename)); - /* Process results */ - numrows = PQntuples(res); - /* note: incrementation of i happens in inner loop's while() test */ - for (i = 0; i < numrows;) + /* Scan all rows for this table */ + do { - char *tablename = PQgetvalue(res, i, 0); - bool first_item = true; + char *attname; + char *typename; + char *attnotnull; + char *attgenerated; + char *attdefault; + char *collname; + char *collnamespace; + + /* If table has no columns, we'll see nulls here */ + if (PQgetisnull(res, i, 1)) + continue; - resetStringInfo(&buf); - appendStringInfo(&buf, "CREATE FOREIGN TABLE %s (\n", - quote_identifier(tablename)); + attname = PQgetvalue(res, i, 1); + typename = PQgetvalue(res, i, 2); + attnotnull = PQgetvalue(res, i, 3); + attdefault = PQgetisnull(res, i, 4) ? NULL : + PQgetvalue(res, i, 4); + attgenerated = PQgetisnull(res, i, 5) ? NULL : + PQgetvalue(res, i, 5); + collname = PQgetisnull(res, i, 6) ? NULL : + PQgetvalue(res, i, 6); + collnamespace = PQgetisnull(res, i, 7) ? NULL : + PQgetvalue(res, i, 7); + + if (first_item) + first_item = false; + else + appendStringInfoString(&buf, ",\n"); - /* Scan all rows for this table */ - do - { - char *attname; - char *typename; - char *attnotnull; - char *attgenerated; - char *attdefault; - char *collname; - char *collnamespace; - - /* If table has no columns, we'll see nulls here */ - if (PQgetisnull(res, i, 1)) - continue; + /* Print column name and type */ + appendStringInfo(&buf, " %s %s", + quote_identifier(attname), + typename); - attname = PQgetvalue(res, i, 1); - typename = PQgetvalue(res, i, 2); - attnotnull = PQgetvalue(res, i, 3); - attdefault = PQgetisnull(res, i, 4) ? NULL : - PQgetvalue(res, i, 4); - attgenerated = PQgetisnull(res, i, 5) ? NULL : - PQgetvalue(res, i, 5); - collname = PQgetisnull(res, i, 6) ? NULL : - PQgetvalue(res, i, 6); - collnamespace = PQgetisnull(res, i, 7) ? NULL : - PQgetvalue(res, i, 7); - - if (first_item) - first_item = false; - else - appendStringInfoString(&buf, ",\n"); + /* + * Add column_name option so that renaming the foreign table's + * column doesn't break the association to the underlying column. + */ + appendStringInfoString(&buf, " OPTIONS (column_name "); + deparseStringLiteral(&buf, attname); + appendStringInfoChar(&buf, ')'); - /* Print column name and type */ - appendStringInfo(&buf, " %s %s", - quote_identifier(attname), - typename); + /* Add COLLATE if needed */ + if (import_collate && collname != NULL && collnamespace != NULL) + appendStringInfo(&buf, " COLLATE %s.%s", + quote_identifier(collnamespace), + quote_identifier(collname)); - /* - * Add column_name option so that renaming the foreign table's - * column doesn't break the association to the underlying - * column. - */ - appendStringInfoString(&buf, " OPTIONS (column_name "); - deparseStringLiteral(&buf, attname); - appendStringInfoChar(&buf, ')'); - - /* Add COLLATE if needed */ - if (import_collate && collname != NULL && collnamespace != NULL) - appendStringInfo(&buf, " COLLATE %s.%s", - quote_identifier(collnamespace), - quote_identifier(collname)); - - /* Add DEFAULT if needed */ - if (import_default && attdefault != NULL && - (!attgenerated || !attgenerated[0])) - appendStringInfo(&buf, " DEFAULT %s", attdefault); - - /* Add GENERATED if needed */ - if (import_generated && attgenerated != NULL && - attgenerated[0] == ATTRIBUTE_GENERATED_STORED) - { - Assert(attdefault != NULL); - appendStringInfo(&buf, - " GENERATED ALWAYS AS (%s) STORED", - attdefault); - } + /* Add DEFAULT if needed */ + if (import_default && attdefault != NULL && + (!attgenerated || !attgenerated[0])) + appendStringInfo(&buf, " DEFAULT %s", attdefault); - /* Add NOT NULL if needed */ - if (import_not_null && attnotnull[0] == 't') - appendStringInfoString(&buf, " NOT NULL"); + /* Add GENERATED if needed */ + if (import_generated && attgenerated != NULL && + attgenerated[0] == ATTRIBUTE_GENERATED_STORED) + { + Assert(attdefault != NULL); + appendStringInfo(&buf, + " GENERATED ALWAYS AS (%s) STORED", + attdefault); } - while (++i < numrows && - strcmp(PQgetvalue(res, i, 0), tablename) == 0); - /* - * Add server name and table-level options. We specify remote - * schema and table name as options (the latter to ensure that - * renaming the foreign table doesn't break the association). - */ - appendStringInfo(&buf, "\n) SERVER %s\nOPTIONS (", - quote_identifier(server->servername)); + /* Add NOT NULL if needed */ + if (import_not_null && attnotnull[0] == 't') + appendStringInfoString(&buf, " NOT NULL"); + } + while (++i < numrows && + strcmp(PQgetvalue(res, i, 0), tablename) == 0); - appendStringInfoString(&buf, "schema_name "); - deparseStringLiteral(&buf, stmt->remote_schema); - appendStringInfoString(&buf, ", table_name "); - deparseStringLiteral(&buf, tablename); + /* + * Add server name and table-level options. We specify remote schema + * and table name as options (the latter to ensure that renaming the + * foreign table doesn't break the association). + */ + appendStringInfo(&buf, "\n) SERVER %s\nOPTIONS (", + quote_identifier(server->servername)); - appendStringInfoString(&buf, ");"); + appendStringInfoString(&buf, "schema_name "); + deparseStringLiteral(&buf, stmt->remote_schema); + appendStringInfoString(&buf, ", table_name "); + deparseStringLiteral(&buf, tablename); - commands = lappend(commands, pstrdup(buf.data)); - } - PQclear(res); + appendStringInfoString(&buf, ");"); + + commands = lappend(commands, pstrdup(buf.data)); + } + PQclear(res); ReleaseConnection(conn); diff --git a/src/include/libpq/libpq-be-fe-helpers.h b/src/include/libpq/libpq-be-fe-helpers.h index 4ba635aa96f92..1c4a342090c33 100644 --- a/src/include/libpq/libpq-be-fe-helpers.h +++ b/src/include/libpq/libpq-be-fe-helpers.h @@ -281,28 +281,28 @@ libpqsrv_get_result_last(PGconn *conn, uint32 wait_event_info) { PGresult *lastResult = NULL; - for (;;) - { - /* Wait for, and collect, the next PGresult. */ - PGresult *result; + for (;;) + { + /* Wait for, and collect, the next PGresult. */ + PGresult *result; - result = libpqsrv_get_result(conn, wait_event_info); - if (result == NULL) - break; /* query is complete, or failure */ + result = libpqsrv_get_result(conn, wait_event_info); + if (result == NULL) + break; /* query is complete, or failure */ - /* - * Emulate PQexec()'s behavior of returning the last result when - * there are many. - */ - PQclear(lastResult); - lastResult = result; + /* + * Emulate PQexec()'s behavior of returning the last result when there + * are many. + */ + PQclear(lastResult); + lastResult = result; - if (PQresultStatus(lastResult) == PGRES_COPY_IN || - PQresultStatus(lastResult) == PGRES_COPY_OUT || - PQresultStatus(lastResult) == PGRES_COPY_BOTH || - PQstatus(conn) == CONNECTION_BAD) - break; - } + if (PQresultStatus(lastResult) == PGRES_COPY_IN || + PQresultStatus(lastResult) == PGRES_COPY_OUT || + PQresultStatus(lastResult) == PGRES_COPY_BOTH || + PQstatus(conn) == CONNECTION_BAD) + break; + } return lastResult; } From 0f9d4d7c12dcebe951061763ca23ee3b6477e7ca Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 25 Jul 2025 16:37:29 -0400 Subject: [PATCH 17/67] Silence leakage complaint about postgres_fdw's InitPgFdwOptions. Valgrind complains that the PQconninfoOption array returned by libpq is leaked. We apparently believed that we could suppress that warning by storing that array's address in a static variable. However, modern C compilers are bright enough to optimize the static variable away. We could escalate that arms race by making the variable global. But on the whole it seems better to revise the code so that it can free libpq's result properly. The only thing that costs us is copying the parameter-name keywords; which seems like a pretty negligible cost in a function that runs at most once per process. Author: Tom Lane Reviewed-by: Matheus Alcantara Discussion: https://postgr.es/m/2976982.1748049023@sss.pgh.pa.us --- contrib/postgres_fdw/option.c | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c index c2f936640bca8..d6fa89bad9399 100644 --- a/contrib/postgres_fdw/option.c +++ b/contrib/postgres_fdw/option.c @@ -21,6 +21,7 @@ #include "libpq/libpq-be.h" #include "postgres_fdw.h" #include "utils/guc.h" +#include "utils/memutils.h" #include "utils/varlena.h" /* @@ -39,12 +40,6 @@ typedef struct PgFdwOption */ static PgFdwOption *postgres_fdw_options; -/* - * Valid options for libpq. - * Allocated and filled in InitPgFdwOptions. - */ -static PQconninfoOption *libpq_options; - /* * GUC parameters */ @@ -239,6 +234,7 @@ static void InitPgFdwOptions(void) { int num_libpq_opts; + PQconninfoOption *libpq_options; PQconninfoOption *lopt; PgFdwOption *popt; @@ -307,8 +303,8 @@ InitPgFdwOptions(void) * Get list of valid libpq options. * * To avoid unnecessary work, we get the list once and use it throughout - * the lifetime of this backend process. We don't need to care about - * memory context issues, because PQconndefaults allocates with malloc. + * the lifetime of this backend process. Hence, we'll allocate it in + * TopMemoryContext. */ libpq_options = PQconndefaults(); if (!libpq_options) /* assume reason for failure is OOM */ @@ -325,19 +321,11 @@ InitPgFdwOptions(void) /* * Construct an array which consists of all valid options for * postgres_fdw, by appending FDW-specific options to libpq options. - * - * We use plain malloc here to allocate postgres_fdw_options because it - * lives as long as the backend process does. Besides, keeping - * libpq_options in memory allows us to avoid copying every keyword - * string. */ postgres_fdw_options = (PgFdwOption *) - malloc(sizeof(PgFdwOption) * num_libpq_opts + - sizeof(non_libpq_options)); - if (postgres_fdw_options == NULL) - ereport(ERROR, - (errcode(ERRCODE_FDW_OUT_OF_MEMORY), - errmsg("out of memory"))); + MemoryContextAlloc(TopMemoryContext, + sizeof(PgFdwOption) * num_libpq_opts + + sizeof(non_libpq_options)); popt = postgres_fdw_options; for (lopt = libpq_options; lopt->keyword; lopt++) @@ -355,8 +343,8 @@ InitPgFdwOptions(void) if (strncmp(lopt->keyword, "oauth_", strlen("oauth_")) == 0) continue; - /* We don't have to copy keyword string, as described above. */ - popt->keyword = lopt->keyword; + popt->keyword = MemoryContextStrdup(TopMemoryContext, + lopt->keyword); /* * "user" and any secret options are allowed only on user mappings. @@ -371,6 +359,9 @@ InitPgFdwOptions(void) popt++; } + /* Done with libpq's output structure. */ + PQconninfoFree(libpq_options); + /* Append FDW-specific options and dummy terminator. */ memcpy(popt, non_libpq_options, sizeof(non_libpq_options)); } From db6461b1c9aae122b90bb52430f06efb306b371a Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 25 Jul 2025 16:45:57 -0400 Subject: [PATCH 18/67] Add commit 73873805f to .git-blame-ignore-revs. --- .git-blame-ignore-revs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 1ee1dee011164..f8526d4d1a9c2 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -14,6 +14,9 @@ # # $ git log --pretty=format:"%H # %cd%n# %s" $PGINDENTGITHASH -1 --date=iso +73873805fb3627cb23937c750fa83ffd8f16fc6c # 2025-07-25 16:36:44 -0400 +# Run pgindent on the changes of the previous patch. + 9e345415bcd3c4358350b89edfd710469b8bfaf9 # 2025-07-01 15:23:07 +0200 # Fix indentation in pg_numa code From 6f22a82a401d267e4bf1fcbcff8d6adb24e14d58 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Sun, 27 Jul 2025 17:48:47 +0900 Subject: [PATCH 19/67] Add assertions for all the required index AM callbacks Similar checks are done for the mandatory table AM callbacks. A portion of the index AM callbacks are optional and can be NULL; the rest is mandatory and is documented as such in the documentation and in amapi.h. These checks are useful to detect quickly if all the mandatory callbacks are defined when implementing a new index access method, as the assertions are run when loading the AM. Author: Japin Li Discussion: https://postgr.es/m/ME0P300MB0445795D31CEAB92C58B41FDB651A@ME0P300MB0445.AUSP300.PROD.OUTLOOK.COM --- src/backend/access/index/amapi.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/backend/access/index/amapi.c b/src/backend/access/index/amapi.c index f0f4f974bcedb..60684c5342279 100644 --- a/src/backend/access/index/amapi.c +++ b/src/backend/access/index/amapi.c @@ -42,6 +42,19 @@ GetIndexAmRoutine(Oid amhandler) elog(ERROR, "index access method handler function %u did not return an IndexAmRoutine struct", amhandler); + /* Assert that all required callbacks are present. */ + Assert(routine->ambuild != NULL); + Assert(routine->ambuildempty != NULL); + Assert(routine->aminsert != NULL); + Assert(routine->ambulkdelete != NULL); + Assert(routine->amvacuumcleanup != NULL); + Assert(routine->amcostestimate != NULL); + Assert(routine->amoptions != NULL); + Assert(routine->amvalidate != NULL); + Assert(routine->ambeginscan != NULL); + Assert(routine->amrescan != NULL); + Assert(routine->amendscan != NULL); + return routine; } From 258bf0a2ea8ff86257f750018bfd44397ce7e554 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Sun, 27 Jul 2025 15:07:47 +0300 Subject: [PATCH 20/67] Process sync requests incrementally in AbsorbSyncRequests If the number of sync requests is big enough, the palloc() call in AbsorbSyncRequests() will attempt to allocate more than 1 GB of memory, resulting in failure. This can lead to an infinite loop in the checkpointer process, as it repeatedly fails to absorb the pending requests. This commit introduces the following changes to cope with this problem: 1. Turn pending checkpointer requests array in shared memory into a bounded ring buffer. 2. Limit maximum ring buffer size to 10M items. 3. Make AbsorbSyncRequests() process requests incrementally in 10K batches. Even #2 makes the whole queue size fit the maximum palloc() size of 1 GB. of continuous lock holding. This commit is for master only. Simpler fix, which just limits a request queue size to 10M, will be backpatched. Reported-by: Ekaterina Sokolova Discussion: https://postgr.es/m/db4534f83a22a29ab5ee2566ad86ca92%40postgrespro.ru Author: Maxim Orlov Co-authored-by: Xuneng Zhou Reviewed-by: Andres Freund Reviewed-by: Heikki Linnakangas Reviewed-by: Alexander Korotkov --- src/backend/postmaster/checkpointer.c | 155 +++++++++++++++++++------- 1 file changed, 114 insertions(+), 41 deletions(-) diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 2809e298a44fb..8490148a47d52 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -130,6 +130,13 @@ typedef struct int num_requests; /* current # of requests */ int max_requests; /* allocated array size */ + + int head; /* Index of the first request in the ring + * buffer */ + int tail; /* Index of the last request in the ring + * buffer */ + + /* The ring buffer of pending checkpointer requests */ CheckpointerRequest requests[FLEXIBLE_ARRAY_MEMBER]; } CheckpointerShmemStruct; @@ -138,6 +145,12 @@ static CheckpointerShmemStruct *CheckpointerShmem; /* interval for calling AbsorbSyncRequests in CheckpointWriteDelay */ #define WRITES_PER_ABSORB 1000 +/* Maximum number of checkpointer requests to process in one batch */ +#define CKPT_REQ_BATCH_SIZE 10000 + +/* Max number of requests the checkpointer request queue can hold */ +#define MAX_CHECKPOINT_REQUESTS 10000000 + /* * GUC parameters */ @@ -973,7 +986,8 @@ CheckpointerShmemInit(void) */ MemSet(CheckpointerShmem, 0, size); SpinLockInit(&CheckpointerShmem->ckpt_lck); - CheckpointerShmem->max_requests = NBuffers; + CheckpointerShmem->max_requests = Min(NBuffers, MAX_CHECKPOINT_REQUESTS); + CheckpointerShmem->head = CheckpointerShmem->tail = 0; ConditionVariableInit(&CheckpointerShmem->start_cv); ConditionVariableInit(&CheckpointerShmem->done_cv); } @@ -1201,6 +1215,7 @@ ForwardSyncRequest(const FileTag *ftag, SyncRequestType type) { CheckpointerRequest *request; bool too_full; + int insert_pos; if (!IsUnderPostmaster) return false; /* probably shouldn't even get here */ @@ -1224,10 +1239,14 @@ ForwardSyncRequest(const FileTag *ftag, SyncRequestType type) } /* OK, insert request */ - request = &CheckpointerShmem->requests[CheckpointerShmem->num_requests++]; + insert_pos = CheckpointerShmem->tail; + request = &CheckpointerShmem->requests[insert_pos]; request->ftag = *ftag; request->type = type; + CheckpointerShmem->tail = (CheckpointerShmem->tail + 1) % CheckpointerShmem->max_requests; + CheckpointerShmem->num_requests++; + /* If queue is more than half full, nudge the checkpointer to empty it */ too_full = (CheckpointerShmem->num_requests >= CheckpointerShmem->max_requests / 2); @@ -1269,12 +1288,16 @@ CompactCheckpointerRequestQueue(void) struct CheckpointerSlotMapping { CheckpointerRequest request; - int slot; + int ring_idx; }; - int n, - preserve_count; + int n; int num_skipped = 0; + int head; + int max_requests; + int num_requests; + int read_idx, + write_idx; HASHCTL ctl; HTAB *htab; bool *skip_slot; @@ -1286,8 +1309,13 @@ CompactCheckpointerRequestQueue(void) if (CritSectionCount > 0) return false; + max_requests = CheckpointerShmem->max_requests; + num_requests = CheckpointerShmem->num_requests; + /* Initialize skip_slot array */ - skip_slot = palloc0(sizeof(bool) * CheckpointerShmem->num_requests); + skip_slot = palloc0(sizeof(bool) * max_requests); + + head = CheckpointerShmem->head; /* Initialize temporary hash table */ ctl.keysize = sizeof(CheckpointerRequest); @@ -1311,7 +1339,8 @@ CompactCheckpointerRequestQueue(void) * away preceding entries that would end up being canceled anyhow), but * it's not clear that the extra complexity would buy us anything. */ - for (n = 0; n < CheckpointerShmem->num_requests; n++) + read_idx = head; + for (n = 0; n < num_requests; n++) { CheckpointerRequest *request; struct CheckpointerSlotMapping *slotmap; @@ -1324,16 +1353,19 @@ CompactCheckpointerRequestQueue(void) * CheckpointerShmemInit. Note also that RelFileLocator had better * contain no pad bytes. */ - request = &CheckpointerShmem->requests[n]; + request = &CheckpointerShmem->requests[read_idx]; slotmap = hash_search(htab, request, HASH_ENTER, &found); if (found) { /* Duplicate, so mark the previous occurrence as skippable */ - skip_slot[slotmap->slot] = true; + skip_slot[slotmap->ring_idx] = true; num_skipped++; } /* Remember slot containing latest occurrence of this request value */ - slotmap->slot = n; + slotmap->ring_idx = read_idx; + + /* Move to the next request in the ring buffer */ + read_idx = (read_idx + 1) % max_requests; } /* Done with the hash table. */ @@ -1347,17 +1379,34 @@ CompactCheckpointerRequestQueue(void) } /* We found some duplicates; remove them. */ - preserve_count = 0; - for (n = 0; n < CheckpointerShmem->num_requests; n++) + read_idx = write_idx = head; + for (n = 0; n < num_requests; n++) { - if (skip_slot[n]) - continue; - CheckpointerShmem->requests[preserve_count++] = CheckpointerShmem->requests[n]; + /* If this slot is NOT skipped, keep it */ + if (!skip_slot[read_idx]) + { + /* If the read and write positions are different, copy the request */ + if (write_idx != read_idx) + CheckpointerShmem->requests[write_idx] = + CheckpointerShmem->requests[read_idx]; + + /* Advance the write position */ + write_idx = (write_idx + 1) % max_requests; + } + + read_idx = (read_idx + 1) % max_requests; } + + /* + * Update ring buffer state: head remains the same, tail moves, count + * decreases + */ + CheckpointerShmem->tail = write_idx; + CheckpointerShmem->num_requests -= num_skipped; + ereport(DEBUG1, (errmsg_internal("compacted fsync request queue from %d entries to %d entries", - CheckpointerShmem->num_requests, preserve_count))); - CheckpointerShmem->num_requests = preserve_count; + num_requests, CheckpointerShmem->num_requests))); /* Cleanup. */ pfree(skip_slot); @@ -1378,40 +1427,64 @@ AbsorbSyncRequests(void) { CheckpointerRequest *requests = NULL; CheckpointerRequest *request; - int n; + int n, + i; + bool loop; if (!AmCheckpointerProcess()) return; - LWLockAcquire(CheckpointerCommLock, LW_EXCLUSIVE); - - /* - * We try to avoid holding the lock for a long time by copying the request - * array, and processing the requests after releasing the lock. - * - * Once we have cleared the requests from shared memory, we have to PANIC - * if we then fail to absorb them (eg, because our hashtable runs out of - * memory). This is because the system cannot run safely if we are unable - * to fsync what we have been told to fsync. Fortunately, the hashtable - * is so small that the problem is quite unlikely to arise in practice. - */ - n = CheckpointerShmem->num_requests; - if (n > 0) + do { - requests = (CheckpointerRequest *) palloc(n * sizeof(CheckpointerRequest)); - memcpy(requests, CheckpointerShmem->requests, n * sizeof(CheckpointerRequest)); - } + LWLockAcquire(CheckpointerCommLock, LW_EXCLUSIVE); + + /*--- + * We try to avoid holding the lock for a long time by: + * 1. Copying the request array and processing the requests after + * releasing the lock; + * 2. Processing not the whole queue, but only batches of + * CKPT_REQ_BATCH_SIZE at once. + * + * Once we have cleared the requests from shared memory, we must + * PANIC if we then fail to absorb them (e.g., because our hashtable + * runs out of memory). This is because the system cannot run safely + * if we are unable to fsync what we have been told to fsync. + * Fortunately, the hashtable is so small that the problem is quite + * unlikely to arise in practice. + * + * Note: The maximum possible size of a ring buffer is + * MAX_CHECKPOINT_REQUESTS entries, which fit into a maximum palloc + * allocation size of 1Gb. Our maximum batch size, + * CKPT_REQ_BATCH_SIZE, is even smaller. + */ + n = Min(CheckpointerShmem->num_requests, CKPT_REQ_BATCH_SIZE); + if (n > 0) + { + if (!requests) + requests = (CheckpointerRequest *) palloc(n * sizeof(CheckpointerRequest)); - START_CRIT_SECTION(); + for (i = 0; i < n; i++) + { + requests[i] = CheckpointerShmem->requests[CheckpointerShmem->head]; + CheckpointerShmem->head = (CheckpointerShmem->head + 1) % CheckpointerShmem->max_requests; + } - CheckpointerShmem->num_requests = 0; + CheckpointerShmem->num_requests -= n; - LWLockRelease(CheckpointerCommLock); + } + + START_CRIT_SECTION(); + + /* Are there any requests in the queue? If so, keep going. */ + loop = CheckpointerShmem->num_requests != 0; + + LWLockRelease(CheckpointerCommLock); - for (request = requests; n > 0; request++, n--) - RememberSyncRequest(&request->ftag, request->type); + for (request = requests; n > 0; request++, n--) + RememberSyncRequest(&request->ftag, request->type); - END_CRIT_SECTION(); + END_CRIT_SECTION(); + } while (loop); if (requests) pfree(requests); From 793928c2d5ac8e60e1e4054fa3b986369777896d Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Mon, 28 Jul 2025 08:15:11 +0900 Subject: [PATCH 21/67] Fix performance regression with flush of pending fixed-numbered stats The callback added in fc415edf8ca8 used to check if there is any pending data to flush for fixed-numbered statistics, done by looping across all the builtin and custom stats kinds with a call to have_fixed_pending_cb, is proving to able to show in workloads that do not report any stats (read-only, no function calls, no WAL, no IO, etc). The code used in v17 was cheaper than that what HEAD has introduced, relying on three boolean checks for WAL, SLRU and IO stats. This commit switches the code to use a more efficient approach than fc415edf8ca8, with a single boolean flag that can be switched to "true" by any fixed-numbered stats kinds to force pgstat_report_stat() to go through one round of reports. The flag is reset by pgstat_report_stat() once a full round of reports is done. The flag being false means that fixed-numbered stats kinds saw no activity, and that there is no pending data to flush. ac000fca743e took one step in improving the performance by reducing the number of stats kinds that the backend can hold. This commit takes a more drastic step by bringing back the code efficiency to what it was before v18 with a cheap check at the beginning of pgstat_report_stat() for its fast-exit path. The callback have_static_pending_cb is removed as an effect of all that. Reported-by: Andres Freund Reviewed-by: Bertrand Drouvot Discussion: https://postgr.es/m/eb224uegsga2hgq7dfq3ps5cduhpqej7ir2hjxzzozjthrekx5@dysei6buqthe Backpatch-through: 18 --- src/backend/access/transam/xlog.c | 10 ++++ src/backend/utils/activity/pgstat.c | 52 ++++++++------------- src/backend/utils/activity/pgstat_backend.c | 14 +----- src/backend/utils/activity/pgstat_io.c | 10 +--- src/backend/utils/activity/pgstat_slru.c | 10 +--- src/backend/utils/activity/pgstat_wal.c | 20 ++++---- src/include/utils/pgstat_internal.h | 34 ++++++++------ 7 files changed, 62 insertions(+), 88 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index eefffc4277a1a..b0891998b243f 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -96,6 +96,7 @@ #include "utils/guc_hooks.h" #include "utils/guc_tables.h" #include "utils/injection_point.h" +#include "utils/pgstat_internal.h" #include "utils/ps_status.h" #include "utils/relmapper.h" #include "utils/snapmgr.h" @@ -1091,6 +1092,9 @@ XLogInsertRecord(XLogRecData *rdata, pgWalUsage.wal_bytes += rechdr->xl_tot_len; pgWalUsage.wal_records++; pgWalUsage.wal_fpi += num_fpi; + + /* Required for the flush of pending stats WAL data */ + pgstat_report_fixed = true; } return EndPos; @@ -2108,6 +2112,12 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic) LWLockRelease(WALWriteLock); pgWalUsage.wal_buffers_full++; TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE(); + + /* + * Required for the flush of pending stats WAL data, per + * update of pgWalUsage. + */ + pgstat_report_fixed = true; } } } diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index 8b57845e8709f..6bc91ce0dadda 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -212,6 +212,11 @@ int pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_CACHE; PgStat_LocalState pgStatLocal; +/* + * Track pending reports for fixed-numbered stats, used by + * pgstat_report_stat(). + */ +bool pgstat_report_fixed = false; /* ---------- * Local data @@ -370,7 +375,6 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] .shared_data_off = offsetof(PgStatShared_Backend, stats), .shared_data_len = sizeof(((PgStatShared_Backend *) 0)->stats), - .have_static_pending_cb = pgstat_backend_have_pending_cb, .flush_static_cb = pgstat_backend_flush_cb, .reset_timestamp_cb = pgstat_backend_reset_timestamp_cb, }, @@ -437,7 +441,6 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] .shared_data_len = sizeof(((PgStatShared_IO *) 0)->stats), .flush_static_cb = pgstat_io_flush_cb, - .have_static_pending_cb = pgstat_io_have_pending_cb, .init_shmem_cb = pgstat_io_init_shmem_cb, .reset_all_cb = pgstat_io_reset_all_cb, .snapshot_cb = pgstat_io_snapshot_cb, @@ -455,7 +458,6 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] .shared_data_len = sizeof(((PgStatShared_SLRU *) 0)->stats), .flush_static_cb = pgstat_slru_flush_cb, - .have_static_pending_cb = pgstat_slru_have_pending_cb, .init_shmem_cb = pgstat_slru_init_shmem_cb, .reset_all_cb = pgstat_slru_reset_all_cb, .snapshot_cb = pgstat_slru_snapshot_cb, @@ -474,7 +476,6 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] .init_backend_cb = pgstat_wal_init_backend_cb, .flush_static_cb = pgstat_wal_flush_cb, - .have_static_pending_cb = pgstat_wal_have_pending_cb, .init_shmem_cb = pgstat_wal_init_shmem_cb, .reset_all_cb = pgstat_wal_reset_all_cb, .snapshot_cb = pgstat_wal_snapshot_cb, @@ -708,29 +709,10 @@ pgstat_report_stat(bool force) } /* Don't expend a clock check if nothing to do */ - if (dlist_is_empty(&pgStatPending)) + if (dlist_is_empty(&pgStatPending) && + !pgstat_report_fixed) { - bool do_flush = false; - - /* Check for pending stats */ - for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++) - { - const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind); - - if (!kind_info) - continue; - if (!kind_info->have_static_pending_cb) - continue; - - if (kind_info->have_static_pending_cb()) - { - do_flush = true; - break; - } - } - - if (!do_flush) - return 0; + return 0; } /* @@ -784,16 +766,19 @@ pgstat_report_stat(bool force) partial_flush |= pgstat_flush_pending_entries(nowait); /* flush of other stats kinds */ - for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++) + if (pgstat_report_fixed) { - const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind); + for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++) + { + const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind); - if (!kind_info) - continue; - if (!kind_info->flush_static_cb) - continue; + if (!kind_info) + continue; + if (!kind_info->flush_static_cb) + continue; - partial_flush |= kind_info->flush_static_cb(nowait); + partial_flush |= kind_info->flush_static_cb(nowait); + } } last_flush = now; @@ -815,6 +800,7 @@ pgstat_report_stat(bool force) } pending_since = 0; + pgstat_report_fixed = false; return 0; } diff --git a/src/backend/utils/activity/pgstat_backend.c b/src/backend/utils/activity/pgstat_backend.c index 51256277e8d37..8714a85e2d936 100644 --- a/src/backend/utils/activity/pgstat_backend.c +++ b/src/backend/utils/activity/pgstat_backend.c @@ -66,6 +66,7 @@ pgstat_count_backend_io_op_time(IOObject io_object, IOContext io_context, io_time); backend_has_iostats = true; + pgstat_report_fixed = true; } void @@ -81,6 +82,7 @@ pgstat_count_backend_io_op(IOObject io_object, IOContext io_context, PendingBackendStats.pending_io.bytes[io_object][io_context][io_op] += bytes; backend_has_iostats = true; + pgstat_report_fixed = true; } /* @@ -301,18 +303,6 @@ pgstat_flush_backend(bool nowait, bits32 flags) return false; } -/* - * Check if there are any backend stats waiting for flush. - */ -bool -pgstat_backend_have_pending_cb(void) -{ - if (!pgstat_tracks_backend_bktype(MyBackendType)) - return false; - - return (backend_has_iostats || pgstat_backend_wal_have_pending()); -} - /* * Callback to flush out locally pending backend statistics. * diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c index d8d26379a571e..13ae57ed6498d 100644 --- a/src/backend/utils/activity/pgstat_io.c +++ b/src/backend/utils/activity/pgstat_io.c @@ -80,6 +80,7 @@ pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op, pgstat_count_backend_io_op(io_object, io_context, io_op, cnt, bytes); have_iostats = true; + pgstat_report_fixed = true; } /* @@ -167,15 +168,6 @@ pgstat_fetch_stat_io(void) return &pgStatLocal.snapshot.io; } -/* - * Check if there any IO stats waiting for flush. - */ -bool -pgstat_io_have_pending_cb(void) -{ - return have_iostats; -} - /* * Simpler wrapper of pgstat_io_flush_cb() */ diff --git a/src/backend/utils/activity/pgstat_slru.c b/src/backend/utils/activity/pgstat_slru.c index b9e940dde45b6..7bd8744accb0e 100644 --- a/src/backend/utils/activity/pgstat_slru.c +++ b/src/backend/utils/activity/pgstat_slru.c @@ -143,15 +143,6 @@ pgstat_get_slru_index(const char *name) return (SLRU_NUM_ELEMENTS - 1); } -/* - * Check if there are any SLRU stats entries waiting for flush. - */ -bool -pgstat_slru_have_pending_cb(void) -{ - return have_slrustats; -} - /* * Flush out locally pending SLRU stats entries * @@ -247,6 +238,7 @@ get_slru_entry(int slru_idx) Assert((slru_idx >= 0) && (slru_idx < SLRU_NUM_ELEMENTS)); have_slrustats = true; + pgstat_report_fixed = true; return &pending_SLRUStats[slru_idx]; } diff --git a/src/backend/utils/activity/pgstat_wal.c b/src/backend/utils/activity/pgstat_wal.c index 16a1ecb4d90d2..0d04480d2f6d0 100644 --- a/src/backend/utils/activity/pgstat_wal.c +++ b/src/backend/utils/activity/pgstat_wal.c @@ -71,6 +71,15 @@ pgstat_fetch_stat_wal(void) return &pgStatLocal.snapshot.wal; } +/* + * To determine whether WAL usage happened. + */ +static inline bool +pgstat_wal_have_pending(void) +{ + return pgWalUsage.wal_records != prevWalUsage.wal_records; +} + /* * Calculate how much WAL usage counters have increased by subtracting the * previous counters from the current ones. @@ -92,7 +101,7 @@ pgstat_wal_flush_cb(bool nowait) * This function can be called even if nothing at all has happened. Avoid * taking lock for nothing in that case. */ - if (!pgstat_wal_have_pending_cb()) + if (!pgstat_wal_have_pending()) return false; /* @@ -136,15 +145,6 @@ pgstat_wal_init_backend_cb(void) prevWalUsage = pgWalUsage; } -/* - * To determine whether WAL usage happened. - */ -bool -pgstat_wal_have_pending_cb(void) -{ - return pgWalUsage.wal_records != prevWalUsage.wal_records; -} - void pgstat_wal_init_shmem_cb(void *stats) { diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h index d5557e6e998cd..6cf00008f6333 100644 --- a/src/include/utils/pgstat_internal.h +++ b/src/include/utils/pgstat_internal.h @@ -295,18 +295,11 @@ typedef struct PgStat_KindInfo * * Returns true if some of the stats could not be flushed, due to lock * contention for example. Optional. - */ - bool (*flush_static_cb) (bool nowait); - - /* - * For fixed-numbered or variable-numbered statistics: Check for pending - * stats in need of flush with flush_static_cb, when these do not use - * PgStat_EntryRef->pending. * - * Returns true if there are any stats pending for flush, triggering - * flush_static_cb. Optional. + * "pgstat_report_fixed" needs to be set to trigger the flush of pending + * stats. */ - bool (*have_static_pending_cb) (void); + bool (*flush_static_cb) (bool nowait); /* * For fixed-numbered statistics: Reset All. @@ -627,7 +620,6 @@ extern void pgstat_archiver_snapshot_cb(void); extern bool pgstat_flush_backend(bool nowait, bits32 flags); extern bool pgstat_backend_flush_cb(bool nowait); -extern bool pgstat_backend_have_pending_cb(void); extern void pgstat_backend_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts); @@ -676,7 +668,6 @@ extern bool pgstat_function_flush_cb(PgStat_EntryRef *entry_ref, bool nowait); extern void pgstat_flush_io(bool nowait); -extern bool pgstat_io_have_pending_cb(void); extern bool pgstat_io_flush_cb(bool nowait); extern void pgstat_io_init_shmem_cb(void *stats); extern void pgstat_io_reset_all_cb(TimestampTz ts); @@ -738,7 +729,6 @@ extern PgStatShared_Common *pgstat_init_entry(PgStat_Kind kind, * Functions in pgstat_slru.c */ -extern bool pgstat_slru_have_pending_cb(void); extern bool pgstat_slru_flush_cb(bool nowait); extern void pgstat_slru_init_shmem_cb(void *stats); extern void pgstat_slru_reset_all_cb(TimestampTz ts); @@ -750,7 +740,6 @@ extern void pgstat_slru_snapshot_cb(void); */ extern void pgstat_wal_init_backend_cb(void); -extern bool pgstat_wal_have_pending_cb(void); extern bool pgstat_wal_flush_cb(bool nowait); extern void pgstat_wal_init_shmem_cb(void *stats); extern void pgstat_wal_reset_all_cb(TimestampTz ts); @@ -778,8 +767,23 @@ extern void pgstat_create_transactional(PgStat_Kind kind, Oid dboid, uint64 obji * Variables in pgstat.c */ -extern PGDLLIMPORT PgStat_LocalState pgStatLocal; +/* + * Track if *any* pending fixed-numbered statistics should be flushed to + * shared memory. + * + * This flag can be switched to true by fixed-numbered statistics to let + * pgstat_report_stat() know if it needs to go through one round of + * reports, calling flush_static_cb for each fixed-numbered statistics + * kind. When this flag is not set, pgstat_report_stat() is able to do + * a fast exit, knowing that there are no pending fixed-numbered statistics. + * + * Statistics callbacks should never reset this flag; pgstat_report_stat() + * is in charge of doing that. + */ +extern PGDLLIMPORT bool pgstat_report_fixed; +/* Backend-local stats state */ +extern PGDLLIMPORT PgStat_LocalState pgStatLocal; /* * Implementation of inline functions declared above. From 3151c264d460c0be09131ce90529073631d70ae8 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Mon, 28 Jul 2025 08:38:24 +0900 Subject: [PATCH 22/67] ecpg: Fix memory leaks in ecpg_auto_prepare() This routines includes three code paths that can fail, with the allocated prepared statement name going out of scope. Per report from Coverity. Oversight in commit a6eabec6808c, that has played with the order of some ecpg_strdup() calls in this code path. --- src/interfaces/ecpg/ecpglib/prepare.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/interfaces/ecpg/ecpglib/prepare.c b/src/interfaces/ecpg/ecpglib/prepare.c index dd6fd1fe7f407..06f0135813b37 100644 --- a/src/interfaces/ecpg/ecpglib/prepare.c +++ b/src/interfaces/ecpg/ecpglib/prepare.c @@ -603,7 +603,10 @@ ecpg_auto_prepare(int lineno, const char *connection_name, const int compat, cha prep = ecpg_find_prepared_statement(stmtID, con, NULL); /* This prepared name doesn't exist on this connection. */ if (!prep && !prepare_common(lineno, con, stmtID, query)) + { + ecpg_free(*name); return false; + } } else @@ -619,11 +622,17 @@ ecpg_auto_prepare(int lineno, const char *connection_name, const int compat, cha return false; if (!ECPGprepare(lineno, connection_name, 0, stmtID, query)) + { + ecpg_free(*name); return false; + } entNo = AddStmtToCache(lineno, stmtID, connection_name, compat, query); if (entNo < 0) + { + ecpg_free(*name); return false; + } } /* increase usage counter */ From dcc9820a3526eb8d89c5da75dad32b8ef4bf8545 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Thu, 24 Jul 2025 13:30:43 -0400 Subject: [PATCH 23/67] Avoid throwing away the error message in syncrep_yyerror. Commit 473a575e05979b4dbb28b3f2544f4ec8f184ce65 purported to make this function stash the error message in *syncrep_parse_result_p, but it didn't actually. As a result, an attempt to set synchronous_standby_names to any value that does not parse resulted in a generic "parser failed." message rather than anything more specific. This fixes that. Discussion: http://postgr.es/m/CA+TgmoYF9wPNZ-Q_EMfib_espgHycY-eX__6Tzo2GpYpVXqCdQ@mail.gmail.com Backpatch-through: 18 --- src/backend/replication/syncrep_scanner.l | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/backend/replication/syncrep_scanner.l b/src/backend/replication/syncrep_scanner.l index 7dec1f869c745..02004d621e73d 100644 --- a/src/backend/replication/syncrep_scanner.l +++ b/src/backend/replication/syncrep_scanner.l @@ -157,17 +157,16 @@ syncrep_yyerror(SyncRepConfigData **syncrep_parse_result_p, char **syncrep_parse { struct yyguts_t *yyg = (struct yyguts_t *) yyscanner; /* needed for yytext * macro */ - char *syncrep_parse_error_msg = *syncrep_parse_error_msg_p; /* report only the first error in a parse operation */ - if (syncrep_parse_error_msg) + if (*syncrep_parse_error_msg_p) return; if (yytext[0]) - syncrep_parse_error_msg = psprintf("%s at or near \"%s\"", - message, yytext); + *syncrep_parse_error_msg_p = psprintf("%s at or near \"%s\"", + message, yytext); else - syncrep_parse_error_msg = psprintf("%s at end of input", - message); + *syncrep_parse_error_msg_p = psprintf("%s at end of input", + message); } void From d5b9b2d40262f57f58322ad49f8928fd4a492adb Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Mon, 28 Jul 2025 11:15:47 -0400 Subject: [PATCH 24/67] Remove misleading hint for "unexpected data beyond EOF" error. Commit ffae5cc5a6024b4e25ec920ed5c4dfac649605f8 added this hint in 2006, but it's now obsolete and doesn't reflect what users should really check in this situation. We were not able to agree on a new hint, so just delete the existing one and update the comments to mention one possibility that is known to cause problems of this kind: something other than PostgreSQL is modifying files in the PostgreSQL data directory. Author: Jakub Wartak Reviewed-by: Robert Haas Reviewed-by: Andres Freund Reviewed-by: Christoph Berg Discussion: https://postgr.es/m/CAKZiRmxNbcaL76x=09Sxf7aUmrRQJBf8drzDdUHo+j9_eM+VMg@mail.gmail.com --- src/backend/storage/buffer/bufmgr.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 6afdd28dba6f2..9c6fe587ec940 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -2743,11 +2743,9 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, * because mdread doesn't complain about reads beyond EOF (when * zero_damaged_pages is ON) and so a previous attempt to read a block * beyond EOF could have left a "valid" zero-filled buffer. - * Unfortunately, we have also seen this case occurring because of - * buggy Linux kernels that sometimes return an lseek(SEEK_END) result - * that doesn't account for a recent write. In that situation, the - * pre-existing buffer would contain valid data that we don't want to - * overwrite. Since the legitimate cases should always have left a + * + * This has also been observed when relation was overwritten by external + * process. Since the legitimate cases should always have left a * zero-filled buffer, complain if not PageIsNew. */ if (existing_id >= 0) @@ -2778,8 +2776,7 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, ereport(ERROR, (errmsg("unexpected data beyond EOF in block %u of relation %s", existing_hdr->tag.blockNum, - relpath(bmr.smgr->smgr_rlocator, fork).str), - errhint("This has been seen to occur with buggy kernels; consider updating your system."))); + relpath(bmr.smgr->smgr_rlocator, fork).str))); /* * We *must* do smgr[zero]extend before succeeding, else the page From 71c0921b649d7a800eb2d6f93539890eaa14d979 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 28 Jul 2025 16:50:41 -0400 Subject: [PATCH 25/67] Avoid regression in the size of XML input that we will accept. This mostly reverts commit 6082b3d5d, "Use xmlParseInNodeContext not xmlParseBalancedChunkMemory". It turns out that xmlParseInNodeContext will reject text chunks exceeding 10MB, while (in most libxml2 versions) xmlParseBalancedChunkMemory will not. The bleeding-edge libxml2 bug that we needed to work around a year ago is presumably no longer a factor, and the argument that xmlParseBalancedChunkMemory is semi-deprecated is not enough to justify a functionality regression. Hence, go back to doing it the old way. Reported-by: Michael Paquier Author: Michael Paquier Co-authored-by: Erik Wienhold Reviewed-by: Tom Lane Discussion: https://postgr.es/m/aIGknLuc8b8ega2X@paquier.xyz Backpatch-through: 13 --- src/backend/utils/adt/xml.c | 68 ++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 38 deletions(-) diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index f7b731825fca0..3379d3922606a 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -1769,7 +1769,7 @@ xml_doctype_in_content(const xmlChar *str) * xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode). * * If parsed_nodes isn't NULL and we parse in CONTENT mode, the list - * of parsed nodes from the xmlParseInNodeContext call will be returned + * of parsed nodes from the xmlParseBalancedChunkMemory call will be returned * to *parsed_nodes. (It is caller's responsibility to free that.) * * Errors normally result in ereport(ERROR), but if escontext is an @@ -1795,6 +1795,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, PgXmlErrorContext *xmlerrcxt; volatile xmlParserCtxtPtr ctxt = NULL; volatile xmlDocPtr doc = NULL; + volatile int save_keep_blanks = -1; /* * This step looks annoyingly redundant, but we must do it to have a @@ -1822,7 +1823,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg, PG_TRY(); { bool parse_as_document = false; - int options; int res_code; size_t count = 0; xmlChar *version = NULL; @@ -1853,18 +1853,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg, parse_as_document = true; } - /* - * Select parse options. - * - * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR) - * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined by - * internal DTD are applied'. As for external DTDs, we try to support - * them too (see SQL/XML:2008 GR 10.16.7.e), but that doesn't really - * happen because xmlPgEntityLoader prevents it. - */ - options = XML_PARSE_NOENT | XML_PARSE_DTDATTR - | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS); - /* initialize output parameters */ if (parsed_xmloptiontype != NULL) *parsed_xmloptiontype = parse_as_document ? XMLOPTION_DOCUMENT : @@ -1874,11 +1862,26 @@ xml_parse(text *data, XmlOptionType xmloption_arg, if (parse_as_document) { + int options; + + /* set up parser context used by xmlCtxtReadDoc */ ctxt = xmlNewParserCtxt(); if (ctxt == NULL || xmlerrcxt->err_occurred) xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate parser context"); + /* + * Select parse options. + * + * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR) + * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined + * by internal DTD are applied'. As for external DTDs, we try to + * support them too (see SQL/XML:2008 GR 10.16.7.e), but that + * doesn't really happen because xmlPgEntityLoader prevents it. + */ + options = XML_PARSE_NOENT | XML_PARSE_DTDATTR + | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS); + doc = xmlCtxtReadDoc(ctxt, utf8string, NULL, /* no URL */ "UTF-8", @@ -1900,10 +1903,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, } else { - xmlNodePtr root; - xmlNodePtr oldroot PG_USED_FOR_ASSERTS_ONLY; - - /* set up document with empty root node to be the context node */ + /* set up document that xmlParseBalancedChunkMemory will add to */ doc = xmlNewDoc(version); if (doc == NULL || xmlerrcxt->err_occurred) xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, @@ -1916,36 +1916,23 @@ xml_parse(text *data, XmlOptionType xmloption_arg, "could not allocate XML document"); doc->standalone = standalone; - root = xmlNewNode(NULL, (const xmlChar *) "content-root"); - if (root == NULL || xmlerrcxt->err_occurred) - xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, - "could not allocate xml node"); - - /* - * This attaches root to doc, so we need not free it separately; - * and there can't yet be any old root to free. - */ - oldroot = xmlDocSetRootElement(doc, root); - Assert(oldroot == NULL); + /* set parse options --- have to do this the ugly way */ + save_keep_blanks = xmlKeepBlanksDefault(preserve_whitespace ? 1 : 0); /* allow empty content */ if (*(utf8string + count)) { xmlNodePtr node_list = NULL; - xmlParserErrors res; - - res = xmlParseInNodeContext(root, - (char *) utf8string + count, - strlen((char *) utf8string + count), - options, - &node_list); - if (res != XML_ERR_OK || xmlerrcxt->err_occurred) + res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, + utf8string + count, + &node_list); + if (res_code != 0 || xmlerrcxt->err_occurred) { - xmlFreeNodeList(node_list); xml_errsave(escontext, xmlerrcxt, ERRCODE_INVALID_XML_CONTENT, "invalid XML content"); + xmlFreeNodeList(node_list); goto fail; } @@ -1961,6 +1948,8 @@ xml_parse(text *data, XmlOptionType xmloption_arg, } PG_CATCH(); { + if (save_keep_blanks != -1) + xmlKeepBlanksDefault(save_keep_blanks); if (doc != NULL) xmlFreeDoc(doc); if (ctxt != NULL) @@ -1972,6 +1961,9 @@ xml_parse(text *data, XmlOptionType xmloption_arg, } PG_END_TRY(); + if (save_keep_blanks != -1) + xmlKeepBlanksDefault(save_keep_blanks); + if (ctxt != NULL) xmlFreeParserCtxt(ctxt); From 4bc62b86849065939a6b85273fece6b92d6e97bf Mon Sep 17 00:00:00 2001 From: David Rowley Date: Tue, 29 Jul 2025 15:18:01 +1200 Subject: [PATCH 26/67] Display Memoize planner estimates in EXPLAIN There've been a few complaints that it can be overly difficult to figure out why the planner picked a Memoize plan. To help address that, here we adjust the EXPLAIN output to display the following additional details: 1) The estimated number of cache entries that can be stored at once 2) The estimated number of unique lookup keys that we expect to see 3) The number of lookups we expect 4) The estimated hit ratio Technically #4 can be calculated using #1, #2 and #3, but it's not a particularly obvious calculation, so we opt to display it explicitly. The original patch by Lukas Fittl only displayed the hit ratio, but there was a fear that might lead to more questions about how that was calculated. The idea with displaying all 4 is to be transparent which may allow queries to be tuned more easily. For example, if #2 isn't correct then maybe extended statistics or a manual n_distinct estimate can be used to help fix poor plan choices. Author: Ilia Evdokimov Author: Lukas Fittl Reviewed-by: David Rowley Reviewed-by: Andrei Lepikhov Reviewed-by: Robert Haas Discussion: https://postgr.es/m/CAP53Pky29GWAVVk3oBgKBDqhND0BRBN6yTPeguV_qSivFL5N_g%40mail.gmail.com --- src/backend/commands/explain.c | 21 ++++++++++++++++++++- src/backend/optimizer/path/costsize.c | 18 ++++++++++++------ src/backend/optimizer/plan/createplan.c | 15 ++++++++++++--- src/backend/optimizer/util/pathnode.c | 11 ++++++++--- src/include/nodes/pathnodes.h | 4 +++- src/include/nodes/plannodes.h | 10 ++++++++++ src/include/optimizer/pathnode.h | 2 +- 7 files changed, 66 insertions(+), 15 deletions(-) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 7e2792ead715b..8345bc0264b23 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -3582,6 +3582,7 @@ static void show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es) { Plan *plan = ((PlanState *) mstate)->plan; + Memoize *mplan = (Memoize *) plan; ListCell *lc; List *context; StringInfoData keystr; @@ -3602,7 +3603,7 @@ show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es) plan, ancestors); - foreach(lc, ((Memoize *) plan)->param_exprs) + foreach(lc, mplan->param_exprs) { Node *expr = (Node *) lfirst(lc); @@ -3618,6 +3619,24 @@ show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es) pfree(keystr.data); + if (es->costs) + { + if (es->format == EXPLAIN_FORMAT_TEXT) + { + ExplainIndentText(es); + appendStringInfo(es->str, "Estimates: capacity=%u distinct keys=%.0f lookups=%.0f hit percent=%.2f%%\n", + mplan->est_entries, mplan->est_unique_keys, + mplan->est_calls, mplan->est_hit_ratio * 100.0); + } + else + { + ExplainPropertyUInteger("Estimated Capacity", NULL, mplan->est_entries, es); + ExplainPropertyFloat("Estimated Distinct Lookup Keys", NULL, mplan->est_unique_keys, 0, es); + ExplainPropertyFloat("Estimated Lookups", NULL, mplan->est_calls, 0, es); + ExplainPropertyFloat("Estimated Hit Percent", NULL, mplan->est_hit_ratio * 100.0, 2, es); + } + } + if (!es->analyze) return; diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 1f04a2c182ca9..344a3188317b1 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -2572,13 +2572,13 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath, Cost input_startup_cost = mpath->subpath->startup_cost; Cost input_total_cost = mpath->subpath->total_cost; double tuples = mpath->subpath->rows; - double calls = mpath->calls; + Cardinality est_calls = mpath->est_calls; int width = mpath->subpath->pathtarget->width; double hash_mem_bytes; double est_entry_bytes; - double est_cache_entries; - double ndistinct; + Cardinality est_cache_entries; + Cardinality ndistinct; double evict_ratio; double hit_ratio; Cost startup_cost; @@ -2604,7 +2604,7 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath, est_cache_entries = floor(hash_mem_bytes / est_entry_bytes); /* estimate on the distinct number of parameter values */ - ndistinct = estimate_num_groups(root, mpath->param_exprs, calls, NULL, + ndistinct = estimate_num_groups(root, mpath->param_exprs, est_calls, NULL, &estinfo); /* @@ -2616,7 +2616,10 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath, * certainly mean a MemoizePath will never survive add_path(). */ if ((estinfo.flags & SELFLAG_USED_DEFAULT) != 0) - ndistinct = calls; + ndistinct = est_calls; + + /* Remember the ndistinct estimate for EXPLAIN */ + mpath->est_unique_keys = ndistinct; /* * Since we've already estimated the maximum number of entries we can @@ -2644,9 +2647,12 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath, * must look at how many scans are estimated in total for this node and * how many of those scans we expect to get a cache hit. */ - hit_ratio = ((calls - ndistinct) / calls) * + hit_ratio = ((est_calls - ndistinct) / est_calls) * (est_cache_entries / Max(ndistinct, est_cache_entries)); + /* Remember the hit ratio estimate for EXPLAIN */ + mpath->est_hit_ratio = hit_ratio; + Assert(hit_ratio >= 0 && hit_ratio <= 1.0); /* diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 8a9f1d7a943a8..bfefc7dbea106 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -284,7 +284,10 @@ static Material *make_material(Plan *lefttree); static Memoize *make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations, List *param_exprs, bool singlerow, bool binary_mode, - uint32 est_entries, Bitmapset *keyparamids); + uint32 est_entries, Bitmapset *keyparamids, + Cardinality est_calls, + Cardinality est_unique_keys, + double est_hit_ratio); static WindowAgg *make_windowagg(List *tlist, WindowClause *wc, int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations, int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations, @@ -1753,7 +1756,8 @@ create_memoize_plan(PlannerInfo *root, MemoizePath *best_path, int flags) plan = make_memoize(subplan, operators, collations, param_exprs, best_path->singlerow, best_path->binary_mode, - best_path->est_entries, keyparamids); + best_path->est_entries, keyparamids, best_path->est_calls, + best_path->est_unique_keys, best_path->est_hit_ratio); copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -6749,7 +6753,9 @@ materialize_finished_plan(Plan *subplan) static Memoize * make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations, List *param_exprs, bool singlerow, bool binary_mode, - uint32 est_entries, Bitmapset *keyparamids) + uint32 est_entries, Bitmapset *keyparamids, + Cardinality est_calls, Cardinality est_unique_keys, + double est_hit_ratio) { Memoize *node = makeNode(Memoize); Plan *plan = &node->plan; @@ -6767,6 +6773,9 @@ make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations, node->binary_mode = binary_mode; node->est_entries = est_entries; node->keyparamids = keyparamids; + node->est_calls = est_calls; + node->est_unique_keys = est_unique_keys; + node->est_hit_ratio = est_hit_ratio; return node; } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 9cc602788eaae..a4c5867cdcb84 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1689,7 +1689,7 @@ create_material_path(RelOptInfo *rel, Path *subpath) MemoizePath * create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, List *param_exprs, List *hash_operators, - bool singlerow, bool binary_mode, double calls) + bool singlerow, bool binary_mode, Cardinality est_calls) { MemoizePath *pathnode = makeNode(MemoizePath); @@ -1710,7 +1710,6 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, pathnode->param_exprs = param_exprs; pathnode->singlerow = singlerow; pathnode->binary_mode = binary_mode; - pathnode->calls = clamp_row_est(calls); /* * For now we set est_entries to 0. cost_memoize_rescan() does all the @@ -1720,6 +1719,12 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, */ pathnode->est_entries = 0; + pathnode->est_calls = clamp_row_est(est_calls); + + /* These will also be set later in cost_memoize_rescan() */ + pathnode->est_unique_keys = 0.0; + pathnode->est_hit_ratio = 0.0; + /* we should not generate this path type when enable_memoize=false */ Assert(enable_memoize); pathnode->path.disabled_nodes = subpath->disabled_nodes; @@ -4259,7 +4264,7 @@ reparameterize_path(PlannerInfo *root, Path *path, mpath->hash_operators, mpath->singlerow, mpath->binary_mode, - mpath->calls); + mpath->est_calls); } default: break; diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index e5dd15098f635..ad2726f026f7d 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -2133,10 +2133,12 @@ typedef struct MemoizePath * complete after caching the first record. */ bool binary_mode; /* true when cache key should be compared bit * by bit, false when using hash equality ops */ - Cardinality calls; /* expected number of rescans */ uint32 est_entries; /* The maximum number of entries that the * planner expects will fit in the cache, or 0 * if unknown */ + Cardinality est_calls; /* expected number of rescans */ + Cardinality est_unique_keys; /* estimated unique keys, for EXPLAIN */ + double est_hit_ratio; /* estimated cache hit ratio, for EXPLAIN */ } MemoizePath; /* diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 46e2e09ea35be..6d8e1e99db3bd 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -1073,6 +1073,16 @@ typedef struct Memoize /* paramids from param_exprs */ Bitmapset *keyparamids; + + /* Estimated number of rescans, for EXPLAIN */ + Cardinality est_calls; + + /* Estimated number of distinct lookup keys, for EXPLAIN */ + Cardinality est_unique_keys; + + /* Estimated cache hit ratio, for EXPLAIN */ + double est_hit_ratio; + } Memoize; /* ---------------- diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 60dcdb77e41be..58936e963cb6b 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -90,7 +90,7 @@ extern MemoizePath *create_memoize_path(PlannerInfo *root, List *hash_operators, bool singlerow, bool binary_mode, - double calls); + Cardinality est_calls); extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, SpecialJoinInfo *sjinfo); extern GatherPath *create_gather_path(PlannerInfo *root, From c2c2c7e225669e81f83a5db3f0f57131cdaa4a2d Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Tue, 29 Jul 2025 10:41:13 +0300 Subject: [PATCH 27/67] Clarify documentation for the initcap function This commit documents differences in the definition of word separators for the initcap function between libc and ICU locale providers. Backpatch to all supported branches. Discussion: https://postgr.es/m/804cc10ef95d4d3b298e76b181fd9437%40postgrespro.ru Author: Oleg Tselebrovskiy Backpatch-through: 13 --- doc/src/sgml/func.sgml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index de5b5929ee078..74a16af04ad3b 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -3148,8 +3148,11 @@ SELECT NOT(ROW(table.*) IS NOT NULL) FROM TABLE; -- detect at least one null in Converts the first letter of each word to upper case and the - rest to lower case. Words are sequences of alphanumeric - characters separated by non-alphanumeric characters. + rest to lower case. When using the libc locale + provider, words are sequences of alphanumeric characters separated + by non-alphanumeric characters; when using the ICU locale provider, + words are separated according to + Unicode Standard Annex #29. initcap('hi THOMAS') From cb833c1b6d19507b13a1a852feea4dbe5d6f0c20 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 29 Jul 2025 17:03:07 +0900 Subject: [PATCH 28/67] Handle timeout in PostgreSQL::Test::Cluster::is_alive() This commit adds an extra --timeout=PG_TEST_TIMEOUT_DEFAULT to the call of pg_isready done in is_alive(), so as it is possible to have more leverage with the call on machines constrained on resources. By default the timeout is 180s, and it can be changed depending on the environment where the tests are run. Per buildfarm member mamba, where the default timeout of 3s used by pg_isready has proved that it may not be enough as the postmaster may not have the time it needs to reply to a ping request. Reported-by: Alexander Lakhin Reviewed-by: Nazir Bilal Yavuz Discussion: https://postgr.es/m/29b637df-f818-4b52-986a-f11ba28300e9@gmail.com --- src/test/perl/PostgreSQL/Test/Cluster.pm | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm index 61f68e0cc2e51..35413f140198b 100644 --- a/src/test/perl/PostgreSQL/Test/Cluster.pm +++ b/src/test/perl/PostgreSQL/Test/Cluster.pm @@ -304,6 +304,7 @@ sub is_alive my $ret = PostgreSQL::Test::Utils::system_log( 'pg_isready', + '--timeout' => $PostgreSQL::Test::Utils::timeout_default, '--host' => $self->host, '--port' => $self->port); From cc321b1d1c55fe208a394b0f8e0e99c5fb91742c Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Tue, 29 Jul 2025 19:43:10 +0900 Subject: [PATCH 29/67] Add regression test for background worker restart after crash. Previously, if a background worker crashed and the server restarted with restart_after_crash enabled, the worker was not restarted as expected. This issue was fixed by commit b5d084c5353, which ensures that background workers without the never-restart flag are correctly restarted after a crash-and-restart cycle. To guard against regressions, this commit adds a test that verifies a background worker successfully restarts in such a scenario. Author: Fujii Masao Reviewed-by: ChangAo Chen Discussion: https://postgr.es/m/CAHGQGwHF-PdUOgiXCH_8K5qBm8b13h0Qt=dSoFXZybXQdbf-tw@mail.gmail.com --- src/test/recovery/t/013_crash_restart.pl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/test/recovery/t/013_crash_restart.pl b/src/test/recovery/t/013_crash_restart.pl index debfa635c36fe..4c5af018ee44e 100644 --- a/src/test/recovery/t/013_crash_restart.pl +++ b/src/test/recovery/t/013_crash_restart.pl @@ -228,6 +228,13 @@ 'before-orderly-restart', 'can still write after crash restart'); +# Confirm that the logical replication launcher, a background worker +# without the never-restart flag, has also restarted successfully. +is($node->poll_query_until('postgres', + "SELECT count(*) = 1 FROM pg_stat_activity WHERE backend_type = 'logical replication launcher'"), + '1', + 'logical replication launcher restarted after crash'); + # Just to be sure, check that an orderly restart now still works $node->restart(); From 1d1612aec7688139e1a5506df1366b4b6a69605d Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Tue, 29 Jul 2025 09:09:42 -0400 Subject: [PATCH 30/67] Run pgindent. Per buildfarm member koel, Nathan Bossart, and David Rowley. --- src/backend/storage/buffer/bufmgr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 9c6fe587ec940..67431208e7f5f 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -2744,9 +2744,9 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, * zero_damaged_pages is ON) and so a previous attempt to read a block * beyond EOF could have left a "valid" zero-filled buffer. * - * This has also been observed when relation was overwritten by external - * process. Since the legitimate cases should always have left a - * zero-filled buffer, complain if not PageIsNew. + * This has also been observed when relation was overwritten by + * external process. Since the legitimate cases should always have + * left a zero-filled buffer, complain if not PageIsNew. */ if (existing_id >= 0) { From b9ebb92bcb6e0db111deacfbc14f470ce1b3ed8d Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 29 Jul 2025 09:42:22 -0400 Subject: [PATCH 31/67] Suppress uninitialized-variable warning. In the wake of commit 80aa9848b, a few compilers think that postgresAcquireSampleRowsFunc's "reltuples" might be used uninitialized. The logic is visibly correct, both before and after that change; presumably what happened here is that the previous presence of a setjmp() in the function stopped them from attempting any flow analysis at all. Add a dummy initialization to silence the warning. Reported-by: Ashutosh Bapat Author: Tom Lane Discussion: https://postgr.es/m/CAExHW5tkerCufA_F6oct5dMJ61N+yVrVgYXL7M8dD-5_zXjrDw@mail.gmail.com --- contrib/postgres_fdw/postgres_fdw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 25b287be069fa..9d266b3e2b120 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -5018,7 +5018,7 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel, int server_version_num; PgFdwSamplingMethod method = ANALYZE_SAMPLE_AUTO; /* auto is default */ double sample_frac = -1.0; - double reltuples; + double reltuples = -1.0; unsigned int cursor_number; StringInfoData sql; PGresult *res; From 74e121c8dc5184318478dee587cf7d8303ab1357 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 29 Jul 2025 10:35:01 -0400 Subject: [PATCH 32/67] Split up pgfdw_report_error so that we can mark it pg_noreturn. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pgfdw_report_error has the same design fault as elog/ereport do, namely that it might or might not return depending on elevel. While those functions are too widely used to redesign, there are only about 30 call sites for pgfdw_report_error, and it's not exposed for extension use. So let's rethink it. Split it into pgfdw_report_error() which hard-wires ERROR elevel and is marked pg_noreturn, and pgfdw_report() which allows only elevels less than ERROR. (Thanks to Álvaro Herrera for suggesting this naming.) The motivation for doing this now is that in the wake of commit 80aa9848b, which removed a bunch of PG_TRYs from postgres_fdw, we're seeing more thorough flow analysis there from C compilers and Coverity. Marking pgfdw_report_error as noreturn where appropriate should help prevent false-positive complaints. We could alternatively have invented a macro wrapper similar to what we use for elog/ereport, but that code is sufficiently fragile that I didn't find it appetizing to make another copy. Since 80aa9848b already changed pgfdw_report_error's signature, this won't make back-patching any harder than it was already. Author: Tom Lane Discussion: https://postgr.es/m/420221.1753714491@sss.pgh.pa.us --- contrib/postgres_fdw/connection.c | 39 ++++++++++++++++++------- contrib/postgres_fdw/postgres_fdw.c | 44 ++++++++++++++--------------- contrib/postgres_fdw/postgres_fdw.h | 6 ++-- 3 files changed, 55 insertions(+), 34 deletions(-) diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c index a33843fcf8531..e8148f2c5a223 100644 --- a/contrib/postgres_fdw/connection.c +++ b/contrib/postgres_fdw/connection.c @@ -142,6 +142,8 @@ static void do_sql_command_begin(PGconn *conn, const char *sql); static void do_sql_command_end(PGconn *conn, const char *sql, bool consume_input); static void begin_remote_xact(ConnCacheEntry *entry); +static void pgfdw_report_internal(int elevel, PGresult *res, PGconn *conn, + const char *sql); static void pgfdw_xact_callback(XactEvent event, void *arg); static void pgfdw_subxact_callback(SubXactEvent event, SubTransactionId mySubid, @@ -815,7 +817,7 @@ static void do_sql_command_begin(PGconn *conn, const char *sql) { if (!PQsendQuery(conn, sql)) - pgfdw_report_error(ERROR, NULL, conn, sql); + pgfdw_report_error(NULL, conn, sql); } static void @@ -830,10 +832,10 @@ do_sql_command_end(PGconn *conn, const char *sql, bool consume_input) * would be large compared to the overhead of PQconsumeInput.) */ if (consume_input && !PQconsumeInput(conn)) - pgfdw_report_error(ERROR, NULL, conn, sql); + pgfdw_report_error(NULL, conn, sql); res = pgfdw_get_result(conn); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, conn, sql); + pgfdw_report_error(res, conn, sql); PQclear(res); } @@ -966,7 +968,10 @@ pgfdw_get_result(PGconn *conn) /* * Report an error we got from the remote server. * - * elevel: error level to use (typically ERROR, but might be less) + * Callers should use pgfdw_report_error() to throw an error, or use + * pgfdw_report() for lesser message levels. (We make this distinction + * so that pgfdw_report_error() can be marked noreturn.) + * * res: PGresult containing the error (might be NULL) * conn: connection we did the query on * sql: NULL, or text of remote command we tried to execute @@ -979,8 +984,22 @@ pgfdw_get_result(PGconn *conn) * marked with have_error = true. */ void -pgfdw_report_error(int elevel, PGresult *res, PGconn *conn, - const char *sql) +pgfdw_report_error(PGresult *res, PGconn *conn, const char *sql) +{ + pgfdw_report_internal(ERROR, res, conn, sql); + pg_unreachable(); +} + +void +pgfdw_report(int elevel, PGresult *res, PGconn *conn, const char *sql) +{ + Assert(elevel < ERROR); /* use pgfdw_report_error for that */ + pgfdw_report_internal(elevel, res, conn, sql); +} + +static void +pgfdw_report_internal(int elevel, PGresult *res, PGconn *conn, + const char *sql) { char *diag_sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE); char *message_primary = PQresultErrorField(res, PG_DIAG_MESSAGE_PRIMARY); @@ -1538,7 +1557,7 @@ pgfdw_exec_cleanup_query_begin(PGconn *conn, const char *query) */ if (!PQsendQuery(conn, query)) { - pgfdw_report_error(WARNING, NULL, conn, query); + pgfdw_report(WARNING, NULL, conn, query); return false; } @@ -1563,7 +1582,7 @@ pgfdw_exec_cleanup_query_end(PGconn *conn, const char *query, */ if (consume_input && !PQconsumeInput(conn)) { - pgfdw_report_error(WARNING, NULL, conn, query); + pgfdw_report(WARNING, NULL, conn, query); return false; } @@ -1575,7 +1594,7 @@ pgfdw_exec_cleanup_query_end(PGconn *conn, const char *query, (errmsg("could not get query result due to timeout"), errcontext("remote SQL command: %s", query))); else - pgfdw_report_error(WARNING, NULL, conn, query); + pgfdw_report(WARNING, NULL, conn, query); return false; } @@ -1583,7 +1602,7 @@ pgfdw_exec_cleanup_query_end(PGconn *conn, const char *query, /* Issue a warning if not successful. */ if (PQresultStatus(result) != PGRES_COMMAND_OK) { - pgfdw_report_error(WARNING, result, conn, query); + pgfdw_report(WARNING, result, conn, query); return ignore_errors; } PQclear(result); diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 9d266b3e2b120..456b267f70b5b 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -1704,7 +1704,7 @@ postgresReScanForeignScan(ForeignScanState *node) res = pgfdw_exec_query(fsstate->conn, sql, fsstate->conn_state); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, fsstate->conn, sql); + pgfdw_report_error(res, fsstate->conn, sql); PQclear(res); /* Now force a fresh FETCH. */ @@ -3614,7 +3614,7 @@ get_remote_estimate(const char *sql, PGconn *conn, */ res = pgfdw_exec_query(conn, sql, NULL); if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, sql); + pgfdw_report_error(res, conn, sql); /* * Extract cost numbers for topmost plan node. Note we search for a left @@ -3769,14 +3769,14 @@ create_cursor(ForeignScanState *node) */ if (!PQsendQueryParams(conn, buf.data, numParams, NULL, values, NULL, NULL, 0)) - pgfdw_report_error(ERROR, NULL, conn, buf.data); + pgfdw_report_error(NULL, conn, buf.data); /* * Get the result, and check for success. */ res = pgfdw_get_result(conn); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, conn, fsstate->query); + pgfdw_report_error(res, conn, fsstate->query); PQclear(res); /* Mark the cursor as created, and show no tuples have been retrieved */ @@ -3823,7 +3823,7 @@ fetch_more_data(ForeignScanState *node) res = pgfdw_get_result(conn); /* On error, report the original query, not the FETCH. */ if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, fsstate->query); + pgfdw_report_error(res, conn, fsstate->query); /* Reset per-connection state */ fsstate->conn_state->pendingAreq = NULL; @@ -3839,7 +3839,7 @@ fetch_more_data(ForeignScanState *node) res = pgfdw_exec_query(conn, sql, fsstate->conn_state); /* On error, report the original query, not the FETCH. */ if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, fsstate->query); + pgfdw_report_error(res, conn, fsstate->query); } /* Convert the data into HeapTuples */ @@ -3944,7 +3944,7 @@ close_cursor(PGconn *conn, unsigned int cursor_number, snprintf(sql, sizeof(sql), "CLOSE c%u", cursor_number); res = pgfdw_exec_query(conn, sql, conn_state); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, conn, sql); + pgfdw_report_error(res, conn, sql); PQclear(res); } @@ -4152,7 +4152,7 @@ execute_foreign_modify(EState *estate, NULL, NULL, 0)) - pgfdw_report_error(ERROR, NULL, fmstate->conn, fmstate->query); + pgfdw_report_error(NULL, fmstate->conn, fmstate->query); /* * Get the result, and check for success. @@ -4160,7 +4160,7 @@ execute_foreign_modify(EState *estate, res = pgfdw_get_result(fmstate->conn); if (PQresultStatus(res) != (fmstate->has_returning ? PGRES_TUPLES_OK : PGRES_COMMAND_OK)) - pgfdw_report_error(ERROR, res, fmstate->conn, fmstate->query); + pgfdw_report_error(res, fmstate->conn, fmstate->query); /* Check number of rows affected, and fetch RETURNING tuple if any */ if (fmstate->has_returning) @@ -4219,14 +4219,14 @@ prepare_foreign_modify(PgFdwModifyState *fmstate) fmstate->query, 0, NULL)) - pgfdw_report_error(ERROR, NULL, fmstate->conn, fmstate->query); + pgfdw_report_error(NULL, fmstate->conn, fmstate->query); /* * Get the result, and check for success. */ res = pgfdw_get_result(fmstate->conn); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, fmstate->conn, fmstate->query); + pgfdw_report_error(res, fmstate->conn, fmstate->query); PQclear(res); /* This action shows that the prepare has been done. */ @@ -4373,7 +4373,7 @@ deallocate_query(PgFdwModifyState *fmstate) snprintf(sql, sizeof(sql), "DEALLOCATE %s", fmstate->p_name); res = pgfdw_exec_query(fmstate->conn, sql, fmstate->conn_state); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, fmstate->conn, sql); + pgfdw_report_error(res, fmstate->conn, sql); PQclear(res); pfree(fmstate->p_name); fmstate->p_name = NULL; @@ -4541,7 +4541,7 @@ execute_dml_stmt(ForeignScanState *node) */ if (!PQsendQueryParams(dmstate->conn, dmstate->query, numParams, NULL, values, NULL, NULL, 0)) - pgfdw_report_error(ERROR, NULL, dmstate->conn, dmstate->query); + pgfdw_report_error(NULL, dmstate->conn, dmstate->query); /* * Get the result, and check for success. @@ -4549,7 +4549,7 @@ execute_dml_stmt(ForeignScanState *node) dmstate->result = pgfdw_get_result(dmstate->conn); if (PQresultStatus(dmstate->result) != (dmstate->has_returning ? PGRES_TUPLES_OK : PGRES_COMMAND_OK)) - pgfdw_report_error(ERROR, dmstate->result, dmstate->conn, + pgfdw_report_error(dmstate->result, dmstate->conn, dmstate->query); /* @@ -4923,7 +4923,7 @@ postgresAnalyzeForeignTable(Relation relation, res = pgfdw_exec_query(conn, sql.data, NULL); if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, sql.data); + pgfdw_report_error(res, conn, sql.data); if (PQntuples(res) != 1 || PQnfields(res) != 1) elog(ERROR, "unexpected result from deparseAnalyzeSizeSql query"); @@ -4972,7 +4972,7 @@ postgresGetAnalyzeInfoForForeignTable(Relation relation, bool *can_tablesample) res = pgfdw_exec_query(conn, sql.data, NULL); if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, sql.data); + pgfdw_report_error(res, conn, sql.data); if (PQntuples(res) != 1 || PQnfields(res) != 2) elog(ERROR, "unexpected result from deparseAnalyzeInfoSql query"); @@ -5202,7 +5202,7 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel, res = pgfdw_exec_query(conn, sql.data, NULL); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, conn, sql.data); + pgfdw_report_error(res, conn, sql.data); PQclear(res); /* @@ -5254,7 +5254,7 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel, res = pgfdw_exec_query(conn, fetch_sql, NULL); /* On error, report the original query, not the FETCH. */ if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, sql.data); + pgfdw_report_error(res, conn, sql.data); /* Process whatever we got. */ numrows = PQntuples(res); @@ -5426,7 +5426,7 @@ postgresImportForeignSchema(ImportForeignSchemaStmt *stmt, Oid serverOid) res = pgfdw_exec_query(conn, buf.data, NULL); if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, buf.data); + pgfdw_report_error(res, conn, buf.data); if (PQntuples(res) != 1) ereport(ERROR, @@ -5540,7 +5540,7 @@ postgresImportForeignSchema(ImportForeignSchemaStmt *stmt, Oid serverOid) /* Fetch the data */ res = pgfdw_exec_query(conn, buf.data, NULL); if (PQresultStatus(res) != PGRES_TUPLES_OK) - pgfdw_report_error(ERROR, res, conn, buf.data); + pgfdw_report_error(res, conn, buf.data); /* Process results */ numrows = PQntuples(res); @@ -7312,7 +7312,7 @@ postgresForeignAsyncNotify(AsyncRequest *areq) /* On error, report the original query, not the FETCH. */ if (!PQconsumeInput(fsstate->conn)) - pgfdw_report_error(ERROR, NULL, fsstate->conn, fsstate->query); + pgfdw_report_error(NULL, fsstate->conn, fsstate->query); fetch_more_data(node); @@ -7411,7 +7411,7 @@ fetch_more_data_begin(AsyncRequest *areq) fsstate->fetch_size, fsstate->cursor_number); if (!PQsendQuery(fsstate->conn, sql)) - pgfdw_report_error(ERROR, NULL, fsstate->conn, fsstate->query); + pgfdw_report_error(NULL, fsstate->conn, fsstate->query); /* Remember that the request is in process */ fsstate->conn_state->pendingAreq = areq; diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h index 38e1a88594131..e69735298d78f 100644 --- a/contrib/postgres_fdw/postgres_fdw.h +++ b/contrib/postgres_fdw/postgres_fdw.h @@ -166,8 +166,10 @@ extern void do_sql_command(PGconn *conn, const char *sql); extern PGresult *pgfdw_get_result(PGconn *conn); extern PGresult *pgfdw_exec_query(PGconn *conn, const char *query, PgFdwConnState *state); -extern void pgfdw_report_error(int elevel, PGresult *res, PGconn *conn, - const char *sql); +pg_noreturn extern void pgfdw_report_error(PGresult *res, PGconn *conn, + const char *sql); +extern void pgfdw_report(int elevel, PGresult *res, PGconn *conn, + const char *sql); /* in option.c */ extern int ExtractConnectionOptions(List *defelems, From 0f3a26feddae7ae403c90742095ff4626d7e5617 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Tue, 29 Jul 2025 10:32:53 -0500 Subject: [PATCH 33/67] Add commit 1d1612aec7 to .git-blame-ignore-revs. --- .git-blame-ignore-revs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index f8526d4d1a9c2..f83e2fc658664 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -14,6 +14,9 @@ # # $ git log --pretty=format:"%H # %cd%n# %s" $PGINDENTGITHASH -1 --date=iso +1d1612aec7688139e1a5506df1366b4b6a69605d # 2025-07-29 09:10:41 -0400 +# Run pgindent. + 73873805fb3627cb23937c750fa83ffd8f16fc6c # 2025-07-25 16:36:44 -0400 # Run pgindent on the changes of the previous patch. From 902f922218894dd69df1874f9f130dbbafff0499 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 29 Jul 2025 12:47:19 -0400 Subject: [PATCH 34/67] Remove unnecessary complication around xmlParseBalancedChunkMemory. When I prepared 71c0921b6 et al yesterday, I was thinking that the logic involving explicitly freeing the node_list output was still needed to dodge leakage bugs in libxml2. But I was misremembering: we introduced that only because with early 2.13.x releases we could not trust xmlParseBalancedChunkMemory's result code, so we had to look to see if a node list was returned or not. There's no reason to believe that xmlParseBalancedChunkMemory will fail to clean up the node list when required, so simplify. (This essentially completes reverting all the non-cosmetic changes in 6082b3d5d.) Reported-by: Jim Jones Author: Tom Lane Discussion: https://postgr.es/m/997668.1753802857@sss.pgh.pa.us Backpatch-through: 13 --- src/backend/utils/adt/xml.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index 3379d3922606a..182e8f75db75c 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -1922,24 +1922,16 @@ xml_parse(text *data, XmlOptionType xmloption_arg, /* allow empty content */ if (*(utf8string + count)) { - xmlNodePtr node_list = NULL; - res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, utf8string + count, - &node_list); + parsed_nodes); if (res_code != 0 || xmlerrcxt->err_occurred) { xml_errsave(escontext, xmlerrcxt, ERRCODE_INVALID_XML_CONTENT, "invalid XML content"); - xmlFreeNodeList(node_list); goto fail; } - - if (parsed_nodes != NULL) - *parsed_nodes = node_list; - else - xmlFreeNodeList(node_list); } } From c3019bb778b99f2541779ed23402a8f825a0000b Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Tue, 29 Jul 2025 18:56:00 +0200 Subject: [PATCH 35/67] Update comment The code being referred to was moved to a different function in commit eb8312a22a8, so update the comment accordingly. --- src/backend/utils/adt/tid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/utils/adt/tid.c b/src/backend/utils/adt/tid.c index 1b0df1117171a..39dab3e42df58 100644 --- a/src/backend/utils/adt/tid.c +++ b/src/backend/utils/adt/tid.c @@ -84,7 +84,7 @@ tidin(PG_FUNCTION_ARGS) /* * Cope with possibility that unsigned long is wider than BlockNumber, in * which case strtoul will not raise an error for some values that are out - * of the range of BlockNumber. (See similar code in oidin().) + * of the range of BlockNumber. (See similar code in uint32in_subr().) */ #if SIZEOF_LONG > 4 if (cvt != (unsigned long) blockNumber && From 4300d8b6a79d61abb5ca9f901df7bde7a49322b6 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 29 Jul 2025 15:17:40 -0400 Subject: [PATCH 36/67] Don't put library-supplied -L/-I switches before user-supplied ones. For many optional libraries, we extract the -L and -l switches needed to link the library from a helper program such as llvm-config. In some cases we put the resulting -L switches into LDFLAGS ahead of -L switches specified via --with-libraries. That risks breaking the user's intention for --with-libraries. It's not such a problem if the library's -L switch points to a directory containing only that library, but on some platforms a library helper may "helpfully" offer a switch such as -L/usr/lib that points to a directory holding all standard libraries. If the user specified --with-libraries in hopes of overriding the standard build of some library, the -L/usr/lib switch prevents that from happening since it will come before the user-specified directory. To fix, avoid inserting these switches directly into LDFLAGS during configure, instead adding them to LIBDIRS or SHLIB_LINK. They will still eventually get added to LDFLAGS, but only after the switches coming from --with-libraries. The same problem exists for -I switches: those coming from --with-includes should appear before any coming from helper programs such as llvm-config. We have not heard field complaints about this case, but it seems certain that a user attempting to override a standard library could have issues. The changes for this go well beyond configure itself, however, because many Makefiles have occasion to manipulate CPPFLAGS to insert locally-desirable -I switches, and some of them got it wrong. The correct ordering is any -I switches pointing at within-the- source-tree-or-build-tree directories, then those from the tree-wide CPPFLAGS, then those from helper programs. There were several places that risked pulling in a system-supplied copy of libpq headers, for example, instead of the in-tree files. (Commit cb36f8ec2 fixed one instance of that a few months ago, but this exercise found more.) The Meson build scripts may or may not have any comparable problems, but I'll leave it to someone else to investigate that. Reported-by: Charles Samborski Author: Tom Lane Discussion: https://postgr.es/m/70f2155f-27ca-4534-b33d-7750e20633d7@demurgos.net Backpatch-through: 13 --- config/llvm.m4 | 4 ++-- config/programs.m4 | 4 ++-- configure | 24 ++++++++++++------------ configure.ac | 18 +++++++++--------- src/Makefile.global.in | 2 +- src/backend/jit/llvm/Makefile | 2 +- src/bin/initdb/Makefile | 2 +- src/common/Makefile | 2 +- src/interfaces/libpq-oauth/Makefile | 2 +- src/interfaces/libpq/Makefile | 2 +- src/pl/plpython/Makefile | 2 +- src/pl/tcl/Makefile | 2 +- 12 files changed, 33 insertions(+), 33 deletions(-) diff --git a/config/llvm.m4 b/config/llvm.m4 index fa4bedd9370fc..9d6fe8199e364 100644 --- a/config/llvm.m4 +++ b/config/llvm.m4 @@ -4,7 +4,7 @@ # ----------------- # # Look for the LLVM installation, check that it's new enough, set the -# corresponding LLVM_{CFLAGS,CXXFLAGS,BINPATH} and LDFLAGS +# corresponding LLVM_{CFLAGS,CXXFLAGS,BINPATH,LIBS} # variables. Also verify that CLANG is available, to transform C # into bitcode. # @@ -55,7 +55,7 @@ AC_DEFUN([PGAC_LLVM_SUPPORT], for pgac_option in `$LLVM_CONFIG --ldflags`; do case $pgac_option in - -L*) LDFLAGS="$LDFLAGS $pgac_option";; + -L*) LLVM_LIBS="$LLVM_LIBS $pgac_option";; esac done diff --git a/config/programs.m4 b/config/programs.m4 index c73d9307ea8a9..e57fe4907b844 100644 --- a/config/programs.m4 +++ b/config/programs.m4 @@ -290,8 +290,8 @@ AC_DEFUN([PGAC_CHECK_LIBCURL], pgac_save_LDFLAGS=$LDFLAGS pgac_save_LIBS=$LIBS - CPPFLAGS="$LIBCURL_CPPFLAGS $CPPFLAGS" - LDFLAGS="$LIBCURL_LDFLAGS $LDFLAGS" + CPPFLAGS="$CPPFLAGS $LIBCURL_CPPFLAGS" + LDFLAGS="$LDFLAGS $LIBCURL_LDFLAGS" AC_CHECK_HEADER(curl/curl.h, [], [AC_MSG_ERROR([header file is required for --with-libcurl])]) diff --git a/configure b/configure index 6d7c22e153fea..8a535da6b7a98 100755 --- a/configure +++ b/configure @@ -5194,7 +5194,7 @@ fi for pgac_option in `$LLVM_CONFIG --ldflags`; do case $pgac_option in - -L*) LDFLAGS="$LDFLAGS $pgac_option";; + -L*) LLVM_LIBS="$LLVM_LIBS $pgac_option";; esac done @@ -9436,12 +9436,12 @@ fi # Note the user could also set XML2_CFLAGS/XML2_LIBS directly for pgac_option in $XML2_CFLAGS; do case $pgac_option in - -I*|-D*) CPPFLAGS="$CPPFLAGS $pgac_option";; + -I*|-D*) INCLUDES="$INCLUDES $pgac_option";; esac done for pgac_option in $XML2_LIBS; do case $pgac_option in - -L*) LDFLAGS="$LDFLAGS $pgac_option";; + -L*) LIBDIRS="$LIBDIRS $pgac_option";; esac done fi @@ -9666,12 +9666,12 @@ fi # note that -llz4 will be added by AC_CHECK_LIB below. for pgac_option in $LZ4_CFLAGS; do case $pgac_option in - -I*|-D*) CPPFLAGS="$CPPFLAGS $pgac_option";; + -I*|-D*) INCLUDES="$INCLUDES $pgac_option";; esac done for pgac_option in $LZ4_LIBS; do case $pgac_option in - -L*) LDFLAGS="$LDFLAGS $pgac_option";; + -L*) LIBDIRS="$LIBDIRS $pgac_option";; esac done fi @@ -9807,12 +9807,12 @@ fi # note that -lzstd will be added by AC_CHECK_LIB below. for pgac_option in $ZSTD_CFLAGS; do case $pgac_option in - -I*|-D*) CPPFLAGS="$CPPFLAGS $pgac_option";; + -I*|-D*) INCLUDES="$INCLUDES $pgac_option";; esac done for pgac_option in $ZSTD_LIBS; do case $pgac_option in - -L*) LDFLAGS="$LDFLAGS $pgac_option";; + -L*) LIBDIRS="$LIBDIRS $pgac_option";; esac done fi @@ -12723,8 +12723,8 @@ if test "$with_libcurl" = yes ; then pgac_save_LDFLAGS=$LDFLAGS pgac_save_LIBS=$LIBS - CPPFLAGS="$LIBCURL_CPPFLAGS $CPPFLAGS" - LDFLAGS="$LIBCURL_LDFLAGS $LDFLAGS" + CPPFLAGS="$CPPFLAGS $LIBCURL_CPPFLAGS" + LDFLAGS="$LDFLAGS $LIBCURL_LDFLAGS" ac_fn_c_check_header_mongrel "$LINENO" "curl/curl.h" "ac_cv_header_curl_curl_h" "$ac_includes_default" if test "x$ac_cv_header_curl_curl_h" = xyes; then : @@ -16658,7 +16658,7 @@ fi if test "$with_icu" = yes; then ac_save_CPPFLAGS=$CPPFLAGS - CPPFLAGS="$ICU_CFLAGS $CPPFLAGS" + CPPFLAGS="$CPPFLAGS $ICU_CFLAGS" # Verify we have ICU's header files ac_fn_c_check_header_mongrel "$LINENO" "unicode/ucol.h" "ac_cv_header_unicode_ucol_h" "$ac_includes_default" @@ -18876,7 +18876,7 @@ Use --without-tcl to disable building PL/Tcl." "$LINENO" 5 fi # now that we have TCL_INCLUDE_SPEC, we can check for ac_save_CPPFLAGS=$CPPFLAGS - CPPFLAGS="$TCL_INCLUDE_SPEC $CPPFLAGS" + CPPFLAGS="$CPPFLAGS $TCL_INCLUDE_SPEC" ac_fn_c_check_header_mongrel "$LINENO" "tcl.h" "ac_cv_header_tcl_h" "$ac_includes_default" if test "x$ac_cv_header_tcl_h" = xyes; then : @@ -18945,7 +18945,7 @@ fi # check for if test "$with_python" = yes; then ac_save_CPPFLAGS=$CPPFLAGS - CPPFLAGS="$python_includespec $CPPFLAGS" + CPPFLAGS="$CPPFLAGS $python_includespec" ac_fn_c_check_header_mongrel "$LINENO" "Python.h" "ac_cv_header_Python_h" "$ac_includes_default" if test "x$ac_cv_header_Python_h" = xyes; then : diff --git a/configure.ac b/configure.ac index c2877e369350e..e72201e679b56 100644 --- a/configure.ac +++ b/configure.ac @@ -1103,12 +1103,12 @@ if test "$with_libxml" = yes ; then # Note the user could also set XML2_CFLAGS/XML2_LIBS directly for pgac_option in $XML2_CFLAGS; do case $pgac_option in - -I*|-D*) CPPFLAGS="$CPPFLAGS $pgac_option";; + -I*|-D*) INCLUDES="$INCLUDES $pgac_option";; esac done for pgac_option in $XML2_LIBS; do case $pgac_option in - -L*) LDFLAGS="$LDFLAGS $pgac_option";; + -L*) LIBDIRS="$LIBDIRS $pgac_option";; esac done fi @@ -1152,12 +1152,12 @@ if test "$with_lz4" = yes; then # note that -llz4 will be added by AC_CHECK_LIB below. for pgac_option in $LZ4_CFLAGS; do case $pgac_option in - -I*|-D*) CPPFLAGS="$CPPFLAGS $pgac_option";; + -I*|-D*) INCLUDES="$INCLUDES $pgac_option";; esac done for pgac_option in $LZ4_LIBS; do case $pgac_option in - -L*) LDFLAGS="$LDFLAGS $pgac_option";; + -L*) LIBDIRS="$LIBDIRS $pgac_option";; esac done fi @@ -1177,12 +1177,12 @@ if test "$with_zstd" = yes; then # note that -lzstd will be added by AC_CHECK_LIB below. for pgac_option in $ZSTD_CFLAGS; do case $pgac_option in - -I*|-D*) CPPFLAGS="$CPPFLAGS $pgac_option";; + -I*|-D*) INCLUDES="$INCLUDES $pgac_option";; esac done for pgac_option in $ZSTD_LIBS; do case $pgac_option in - -L*) LDFLAGS="$LDFLAGS $pgac_option";; + -L*) LIBDIRS="$LIBDIRS $pgac_option";; esac done fi @@ -1944,7 +1944,7 @@ fi if test "$with_icu" = yes; then ac_save_CPPFLAGS=$CPPFLAGS - CPPFLAGS="$ICU_CFLAGS $CPPFLAGS" + CPPFLAGS="$CPPFLAGS $ICU_CFLAGS" # Verify we have ICU's header files AC_CHECK_HEADER(unicode/ucol.h, [], @@ -2344,7 +2344,7 @@ Use --without-tcl to disable building PL/Tcl.]) fi # now that we have TCL_INCLUDE_SPEC, we can check for ac_save_CPPFLAGS=$CPPFLAGS - CPPFLAGS="$TCL_INCLUDE_SPEC $CPPFLAGS" + CPPFLAGS="$CPPFLAGS $TCL_INCLUDE_SPEC" AC_CHECK_HEADER(tcl.h, [], [AC_MSG_ERROR([header file is required for Tcl])]) CPPFLAGS=$ac_save_CPPFLAGS fi @@ -2381,7 +2381,7 @@ fi # check for if test "$with_python" = yes; then ac_save_CPPFLAGS=$CPPFLAGS - CPPFLAGS="$python_includespec $CPPFLAGS" + CPPFLAGS="$CPPFLAGS $python_includespec" AC_CHECK_HEADER(Python.h, [], [AC_MSG_ERROR([header file is required for Python])]) CPPFLAGS=$ac_save_CPPFLAGS fi diff --git a/src/Makefile.global.in b/src/Makefile.global.in index 04952b533ded9..8b1b357beaa04 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -254,7 +254,7 @@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ PG_SYSROOT = @PG_SYSROOT@ -override CPPFLAGS := $(ICU_CFLAGS) $(LIBNUMA_CFLAGS) $(LIBURING_CFLAGS) $(CPPFLAGS) +override CPPFLAGS += $(ICU_CFLAGS) $(LIBNUMA_CFLAGS) $(LIBURING_CFLAGS) ifdef PGXS override CPPFLAGS := -I$(includedir_server) -I$(includedir_internal) $(CPPFLAGS) diff --git a/src/backend/jit/llvm/Makefile b/src/backend/jit/llvm/Makefile index e8c12060b93df..68677ba42e189 100644 --- a/src/backend/jit/llvm/Makefile +++ b/src/backend/jit/llvm/Makefile @@ -31,7 +31,7 @@ endif # All files in this directory use LLVM. CFLAGS += $(LLVM_CFLAGS) CXXFLAGS += $(LLVM_CXXFLAGS) -override CPPFLAGS := $(LLVM_CPPFLAGS) $(CPPFLAGS) +override CPPFLAGS += $(LLVM_CPPFLAGS) SHLIB_LINK += $(LLVM_LIBS) # Because this module includes C++ files, we need to use a C++ diff --git a/src/bin/initdb/Makefile b/src/bin/initdb/Makefile index 997e0a013e956..c0470efda92a3 100644 --- a/src/bin/initdb/Makefile +++ b/src/bin/initdb/Makefile @@ -20,7 +20,7 @@ include $(top_builddir)/src/Makefile.global # from libpq, else we have risks of version skew if we run with a libpq # shared library from a different PG version. Define # USE_PRIVATE_ENCODING_FUNCS to ensure that that happens. -override CPPFLAGS := -DUSE_PRIVATE_ENCODING_FUNCS -I$(libpq_srcdir) -I$(top_srcdir)/src/timezone $(ICU_CFLAGS) $(CPPFLAGS) +override CPPFLAGS := -DUSE_PRIVATE_ENCODING_FUNCS -I$(libpq_srcdir) -I$(top_srcdir)/src/timezone $(CPPFLAGS) $(ICU_CFLAGS) # We need libpq only because fe_utils does. LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport) $(ICU_LIBS) diff --git a/src/common/Makefile b/src/common/Makefile index 1e2b91c83c4c4..2c720caa50972 100644 --- a/src/common/Makefile +++ b/src/common/Makefile @@ -163,7 +163,7 @@ libpgcommon_shlib.a: $(OBJS_SHLIB) # The JSON API normally exits on out-of-memory; disable that behavior for shared # library builds. This requires libpq's pqexpbuffer.h. jsonapi_shlib.o: override CPPFLAGS += -DJSONAPI_USE_PQEXPBUFFER -jsonapi_shlib.o: override CPPFLAGS += -I$(libpq_srcdir) +jsonapi_shlib.o: override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) # Because this uses its own compilation rule, it doesn't use the # dependency tracking logic from Makefile.global. To make sure that diff --git a/src/interfaces/libpq-oauth/Makefile b/src/interfaces/libpq-oauth/Makefile index 270fc0cf2d9d9..682f17413b3a4 100644 --- a/src/interfaces/libpq-oauth/Makefile +++ b/src/interfaces/libpq-oauth/Makefile @@ -24,7 +24,7 @@ NAME = pq-oauth-$(MAJORVERSION) override shlib := lib$(NAME)$(DLSUFFIX) override stlib := libpq-oauth.a -override CPPFLAGS := -I$(libpq_srcdir) -I$(top_builddir)/src/port $(LIBCURL_CPPFLAGS) $(CPPFLAGS) +override CPPFLAGS := -I$(libpq_srcdir) -I$(top_builddir)/src/port $(CPPFLAGS) $(LIBCURL_CPPFLAGS) OBJS = \ $(WIN32RES) diff --git a/src/interfaces/libpq/Makefile b/src/interfaces/libpq/Makefile index 47d6781150944..da6650066d46e 100644 --- a/src/interfaces/libpq/Makefile +++ b/src/interfaces/libpq/Makefile @@ -24,7 +24,7 @@ NAME= pq SO_MAJOR_VERSION= 5 SO_MINOR_VERSION= $(MAJORVERSION) -override CPPFLAGS := -I$(srcdir) $(CPPFLAGS) -I$(top_builddir)/src/port -I$(top_srcdir)/src/port +override CPPFLAGS := -I$(srcdir) -I$(top_builddir)/src/port -I$(top_srcdir)/src/port $(CPPFLAGS) ifneq ($(PORTNAME), win32) override CFLAGS += $(PTHREAD_CFLAGS) endif diff --git a/src/pl/plpython/Makefile b/src/pl/plpython/Makefile index f959083a0bdec..25f295c3709e2 100644 --- a/src/pl/plpython/Makefile +++ b/src/pl/plpython/Makefile @@ -11,7 +11,7 @@ ifeq ($(PORTNAME), win32) override python_libspec = endif -override CPPFLAGS := -I. -I$(srcdir) $(python_includespec) $(CPPFLAGS) +override CPPFLAGS := -I. -I$(srcdir) $(CPPFLAGS) $(python_includespec) rpathdir = $(python_libdir) diff --git a/src/pl/tcl/Makefile b/src/pl/tcl/Makefile index ea52a2efc229d..dd57f7d694c82 100644 --- a/src/pl/tcl/Makefile +++ b/src/pl/tcl/Makefile @@ -11,7 +11,7 @@ top_builddir = ../../.. include $(top_builddir)/src/Makefile.global -override CPPFLAGS := -I. -I$(srcdir) $(TCL_INCLUDE_SPEC) $(CPPFLAGS) +override CPPFLAGS := -I. -I$(srcdir) $(CPPFLAGS) $(TCL_INCLUDE_SPEC) # On Windows, we don't link directly with the Tcl library; see below ifneq ($(PORTNAME), win32) From 613f64712257d4b94e068e77fb0593e0a71d8df1 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 30 Jul 2025 00:39:49 +0300 Subject: [PATCH 37/67] Handle cancel requests with PID 0 gracefully If the client sent a query cancel request with backend PID 0, it tripped an assertion. With assertions disabled, you got this in the log instead: LOG: invalid cancel request with PID 0 LOG: wrong key in cancel request for process 0 Query cancellations don't even require authentication, so we better tolerate bogus requests. Fix by turning the assertion into a regular runtime check. Spotted while testing libpq behavior with a modified server that didn't send BackendKeyData to the client. Backpatch-through: 18 --- src/backend/storage/ipc/procsignal.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c index a9bb540b55ac2..087821311cceb 100644 --- a/src/backend/storage/ipc/procsignal.c +++ b/src/backend/storage/ipc/procsignal.c @@ -728,7 +728,11 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) void SendCancelRequest(int backendPID, const uint8 *cancel_key, int cancel_key_len) { - Assert(backendPID != 0); + if (backendPID == 0) + { + ereport(LOG, (errmsg("invalid cancel request with PID 0"))); + return; + } /* * See if we have a matching backend. Reading the pss_pid and From 1a5212775e46fd573e74c9213392177920f0efd6 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 30 Jul 2025 11:55:42 +0900 Subject: [PATCH 38/67] Fix ./configure checks with __cpuidex() and __cpuid() The configure checks used two incorrect functions when checking the presence of some routines in an environment: - __get_cpuidex() for the check of __cpuidex(). - __get_cpuid() for the check of __cpuid(). This means that Postgres has never been able to detect the presence of these functions, impacting environments where these exist, like Windows. Simply fixing the function name does not work. For example, using configure with MinGW on Windows causes the checks to detect all four of __get_cpuid(), __get_cpuid_count(), __cpuidex() and __cpuid() to be available, causing a compilation failure as this messes up with the MinGW headers as we would include both and . The Postgres code expects only one in { __get_cpuid() , __cpuid() } and one in { __get_cpuid_count() , __cpuidex() } to exist. This commit reshapes the configure checks to do exactly what meson is doing, which has been working well for us: check one, then the other, but never allow both to be detected in a given build. The logic is wrong since 3dc2d62d0486 and 792752af4eb5 where these checks have been introduced (the second case is most likely a copy-pasto coming from the first case), with meson documenting that the configure checks were broken. As far as I can see, they are not once applied consistently with what the code expects, but let's see if the buildfarm has different something to say. The comment in meson.build is adjusted as well, to reflect the new reality. Author: Lukas Fittl Co-authored-by: Michael Paquier Discussion: https://postgr.es/m/aIgwNYGVt5aRAqTJ@paquier.xyz Backpatch-through: 13 --- configure | 63 +++++++++++++++++++++++++++------------------------- configure.ac | 49 +++++++++++++++++++++------------------- meson.build | 5 +---- 3 files changed, 60 insertions(+), 57 deletions(-) diff --git a/configure b/configure index 8a535da6b7a98..507a2437c3308 100755 --- a/configure +++ b/configure @@ -17565,7 +17565,7 @@ $as_echo "#define HAVE_GCC__ATOMIC_INT64_CAS 1" >>confdefs.h fi -# Check for x86 cpuid instruction +# Check for __get_cpuid() and __cpuid() { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __get_cpuid" >&5 $as_echo_n "checking for __get_cpuid... " >&6; } if ${pgac_cv__get_cpuid+:} false; then : @@ -17598,77 +17598,79 @@ if test x"$pgac_cv__get_cpuid" = x"yes"; then $as_echo "#define HAVE__GET_CPUID 1" >>confdefs.h -fi - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __get_cpuid_count" >&5 -$as_echo_n "checking for __get_cpuid_count... " >&6; } -if ${pgac_cv__get_cpuid_count+:} false; then : +else + # __cpuid() + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __cpuid" >&5 +$as_echo_n "checking for __cpuid... " >&6; } +if ${pgac_cv__cpuid+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -#include +#include int main () { unsigned int exx[4] = {0, 0, 0, 0}; - __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); + __cpuid(exx, 1); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : - pgac_cv__get_cpuid_count="yes" + pgac_cv__cpuid="yes" else - pgac_cv__get_cpuid_count="no" + pgac_cv__cpuid="no" fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__get_cpuid_count" >&5 -$as_echo "$pgac_cv__get_cpuid_count" >&6; } -if test x"$pgac_cv__get_cpuid_count" = x"yes"; then +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__cpuid" >&5 +$as_echo "$pgac_cv__cpuid" >&6; } + if test x"$pgac_cv__cpuid" = x"yes"; then -$as_echo "#define HAVE__GET_CPUID_COUNT 1" >>confdefs.h +$as_echo "#define HAVE__CPUID 1" >>confdefs.h + fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __cpuid" >&5 -$as_echo_n "checking for __cpuid... " >&6; } -if ${pgac_cv__cpuid+:} false; then : +# Check for __get_cpuid_count() and __cpuidex() in a similar fashion. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __get_cpuid_count" >&5 +$as_echo_n "checking for __get_cpuid_count... " >&6; } +if ${pgac_cv__get_cpuid_count+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -#include +#include int main () { unsigned int exx[4] = {0, 0, 0, 0}; - __get_cpuid(exx[0], 1); + __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : - pgac_cv__cpuid="yes" + pgac_cv__get_cpuid_count="yes" else - pgac_cv__cpuid="no" + pgac_cv__get_cpuid_count="no" fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__cpuid" >&5 -$as_echo "$pgac_cv__cpuid" >&6; } -if test x"$pgac_cv__cpuid" = x"yes"; then - -$as_echo "#define HAVE__CPUID 1" >>confdefs.h +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__get_cpuid_count" >&5 +$as_echo "$pgac_cv__get_cpuid_count" >&6; } +if test x"$pgac_cv__get_cpuid_count" = x"yes"; then -fi +$as_echo "#define HAVE__GET_CPUID_COUNT 1" >>confdefs.h -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __cpuidex" >&5 +else + # __cpuidex() + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __cpuidex" >&5 $as_echo_n "checking for __cpuidex... " >&6; } if ${pgac_cv__cpuidex+:} false; then : $as_echo_n "(cached) " >&6 @@ -17680,7 +17682,7 @@ int main () { unsigned int exx[4] = {0, 0, 0, 0}; - __get_cpuidex(exx[0], 7, 0); + __cpuidex(exx, 7, 0); ; return 0; @@ -17696,10 +17698,11 @@ rm -f core conftest.err conftest.$ac_objext \ fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__cpuidex" >&5 $as_echo "$pgac_cv__cpuidex" >&6; } -if test x"$pgac_cv__cpuidex" = x"yes"; then + if test x"$pgac_cv__cpuidex" = x"yes"; then $as_echo "#define HAVE__CPUIDEX 1" >>confdefs.h + fi fi # Check for XSAVE intrinsics diff --git a/configure.ac b/configure.ac index e72201e679b56..5f4548adc5cd1 100644 --- a/configure.ac +++ b/configure.ac @@ -2044,7 +2044,7 @@ PGAC_HAVE_GCC__ATOMIC_INT32_CAS PGAC_HAVE_GCC__ATOMIC_INT64_CAS -# Check for x86 cpuid instruction +# Check for __get_cpuid() and __cpuid() AC_CACHE_CHECK([for __get_cpuid], [pgac_cv__get_cpuid], [AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], [[unsigned int exx[4] = {0, 0, 0, 0}; @@ -2054,8 +2054,21 @@ AC_CACHE_CHECK([for __get_cpuid], [pgac_cv__get_cpuid], [pgac_cv__get_cpuid="no"])]) if test x"$pgac_cv__get_cpuid" = x"yes"; then AC_DEFINE(HAVE__GET_CPUID, 1, [Define to 1 if you have __get_cpuid.]) +else + # __cpuid() + AC_CACHE_CHECK([for __cpuid], [pgac_cv__cpuid], + [AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], + [[unsigned int exx[4] = {0, 0, 0, 0}; + __cpuid(exx, 1); + ]])], + [pgac_cv__cpuid="yes"], + [pgac_cv__cpuid="no"])]) + if test x"$pgac_cv__cpuid" = x"yes"; then + AC_DEFINE(HAVE__CPUID, 1, [Define to 1 if you have __cpuid.]) + fi fi +# Check for __get_cpuid_count() and __cpuidex() in a similar fashion. AC_CACHE_CHECK([for __get_cpuid_count], [pgac_cv__get_cpuid_count], [AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], [[unsigned int exx[4] = {0, 0, 0, 0}; @@ -2065,28 +2078,18 @@ AC_CACHE_CHECK([for __get_cpuid_count], [pgac_cv__get_cpuid_count], [pgac_cv__get_cpuid_count="no"])]) if test x"$pgac_cv__get_cpuid_count" = x"yes"; then AC_DEFINE(HAVE__GET_CPUID_COUNT, 1, [Define to 1 if you have __get_cpuid_count.]) -fi - -AC_CACHE_CHECK([for __cpuid], [pgac_cv__cpuid], -[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], - [[unsigned int exx[4] = {0, 0, 0, 0}; - __get_cpuid(exx[0], 1); - ]])], - [pgac_cv__cpuid="yes"], - [pgac_cv__cpuid="no"])]) -if test x"$pgac_cv__cpuid" = x"yes"; then - AC_DEFINE(HAVE__CPUID, 1, [Define to 1 if you have __cpuid.]) -fi - -AC_CACHE_CHECK([for __cpuidex], [pgac_cv__cpuidex], -[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], - [[unsigned int exx[4] = {0, 0, 0, 0}; - __get_cpuidex(exx[0], 7, 0); - ]])], - [pgac_cv__cpuidex="yes"], - [pgac_cv__cpuidex="no"])]) -if test x"$pgac_cv__cpuidex" = x"yes"; then - AC_DEFINE(HAVE__CPUIDEX, 1, [Define to 1 if you have __cpuidex.]) +else + # __cpuidex() + AC_CACHE_CHECK([for __cpuidex], [pgac_cv__cpuidex], + [AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], + [[unsigned int exx[4] = {0, 0, 0, 0}; + __cpuidex(exx, 7, 0); + ]])], + [pgac_cv__cpuidex="yes"], + [pgac_cv__cpuidex="no"])]) + if test x"$pgac_cv__cpuidex" = x"yes"; then + AC_DEFINE(HAVE__CPUIDEX, 1, [Define to 1 if you have __cpuidex.]) + fi fi # Check for XSAVE intrinsics diff --git a/meson.build b/meson.build index 5365aaf95e64b..ca423dc8e12f3 100644 --- a/meson.build +++ b/meson.build @@ -1996,10 +1996,7 @@ if cc.links(''' cdata.set('HAVE__BUILTIN_OP_OVERFLOW', 1) endif - -# XXX: The configure.ac check for __cpuid() is broken, we don't copy that -# here. To prevent problems due to two detection methods working, stop -# checking after one. +# Check for __get_cpuid() and __cpuid(). if cc.links(''' #include int main(int arg, char **argv) From 00c977177956c4b4d12f8c6518d4269b086deca8 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 30 Jul 2025 09:51:45 +0200 Subject: [PATCH 39/67] Fix whitespace --- doc/src/sgml/pageinspect.sgml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/sgml/pageinspect.sgml b/doc/src/sgml/pageinspect.sgml index 1292933366555..f5014787c783b 100644 --- a/doc/src/sgml/pageinspect.sgml +++ b/doc/src/sgml/pageinspect.sgml @@ -741,7 +741,7 @@ test=# SELECT first_tid, nbytes, tids[0:5] AS some_tids For example: test=# SELECT * FROM gist_page_opaque_info(get_raw_page('test_gist_idx', 2)); - lsn | nsn | rightlink | flags + lsn | nsn | rightlink | flags ------------+------------+-----------+-------- 0/0B5FE088 | 0/00000000 | 1 | {leaf} (1 row) From ce9a6244b5b4ce1df71611512a757353803404a5 Mon Sep 17 00:00:00 2001 From: Andrew Dunstan Date: Wed, 30 Jul 2025 11:04:05 -0400 Subject: [PATCH 40/67] Revert Non text modes for pg_dumpall, and pg_restore support Recent discussions of the mechanisms used to manage global data have raised concerns about their robustness and security. Rather than try to deal with those concerns at a very late stage of the release cycle, the conclusion is to revert these features and work on them for the next release. This reverts parts or all of the following commits: 1495eff7bdb Non text modes for pg_dumpall, correspondingly change pg_restore 5db3bf7391d Clean up from commit 1495eff7bdb 289f74d0cb2 Add more TAP tests for pg_dumpall 2ef57908067 Fix a couple of error messages and tests for them b52a4a5f285 Clean up error messages from 1495eff7bdb 4170298b6ec Further cleanup for directory creation on pg_dump/pg_dumpall 22cb6d28950 Fix memory leak in pg_restore.c 928394b664b Improve various new-to-v18 appendStringInfo calls 39729ec01d2 Fix fat fingering in 22cb6d28950 5822bf21d50 Add missing space in pg_restore documentation. f09088a01d3 Free memory properly in pg_restore.c 40b9c27014d pg_restore cleanups 4aad2cb7707 Portability fix: isdigit() must be passed an unsigned char. 88e947136b4 Fix typos and grammar in the code f60420cff66 doc: Alphabetize long options for pg_dump[all]. bc35adee8d7 doc: Put new options in consistent order on man pages a876464abc7 Message style improvements dec6643487b Improve pg_dump/pg_dumpall help synopses and terminology 0ebd2425558 Run pgperltidy Discussion: https://postgr.es/m/20250708212819.09.nmisch@google.com Backpatch-to: 18 Reviewed-by: Noah Misch --- doc/src/sgml/ref/pg_dumpall.sgml | 89 +-- doc/src/sgml/ref/pg_restore.sgml | 66 +-- src/bin/pg_dump/meson.build | 1 - src/bin/pg_dump/parallel.c | 10 - src/bin/pg_dump/pg_backup.h | 2 +- src/bin/pg_dump/pg_backup_archiver.c | 20 +- src/bin/pg_dump/pg_backup_archiver.h | 1 - src/bin/pg_dump/pg_backup_tar.c | 2 +- src/bin/pg_dump/pg_dump.c | 2 +- src/bin/pg_dump/pg_dumpall.c | 238 ++------- src/bin/pg_dump/pg_restore.c | 772 +-------------------------- src/bin/pg_dump/t/001_basic.pl | 22 - src/bin/pg_dump/t/006_pg_dumpall.pl | 400 -------------- 13 files changed, 85 insertions(+), 1540 deletions(-) delete mode 100644 src/bin/pg_dump/t/006_pg_dumpall.pl diff --git a/doc/src/sgml/ref/pg_dumpall.sgml b/doc/src/sgml/ref/pg_dumpall.sgml index 8ca68da5a5560..f4cbc8288e3ad 100644 --- a/doc/src/sgml/ref/pg_dumpall.sgml +++ b/doc/src/sgml/ref/pg_dumpall.sgml @@ -16,10 +16,7 @@ PostgreSQL documentation pg_dumpall - - - export a PostgreSQL database cluster as an SQL script or to other formats - + extract a PostgreSQL database cluster into a script file @@ -36,7 +33,7 @@ PostgreSQL documentation pg_dumpall is a utility for writing out (dumping) all PostgreSQL databases - of a cluster into an SQL script file or an archive. The output contains + of a cluster into one script file. The script file contains SQL commands that can be used as input to to restore the databases. It does this by calling for each database in the cluster. @@ -55,16 +52,11 @@ PostgreSQL documentation - Plain text SQL scripts will be written to the standard output. Use the + The SQL script will be written to the standard output. Use the / option or shell operators to redirect it into a file. - - Archives in other formats will be placed in a directory named using the - /, which is required in this case. - - pg_dumpall needs to connect several times to the PostgreSQL server (once per @@ -129,85 +121,10 @@ PostgreSQL documentation Send output to the specified file. If this is omitted, the standard output is used. - Note: This option can only be omitted when is plain - - - - - - Specify the format of dump files. In plain format, all the dump data is - sent in a single text stream. This is the default. - - In all other modes, pg_dumpall first creates two files: - global.dat and map.dat, in the directory - specified by . - The first file contains global data, such as roles and tablespaces. The second - contains a mapping between database oids and names. These files are used by - pg_restore. Data for individual databases is placed in - databases subdirectory, named using the database's oid. - - - - d - directory - - - Output directory-format archives for each database, - suitable for input into pg_restore. The directory - will have database oid as its name. - - - - - - p - plain - - - Output a plain-text SQL script file (the default). - - - - - - c - custom - - - Output a custom-format archive for each database, - suitable for input into pg_restore. The archive - will be named dboid.dmp where dboid is the - oid of the database. - - - - - - t - tar - - - Output a tar-format archive for each database, - suitable for input into pg_restore. The archive - will be named dboid.tar where dboid is the - oid of the database. - - - - - - - Note: see for details - of how the various non plain text archives work. - - - - - diff --git a/doc/src/sgml/ref/pg_restore.sgml b/doc/src/sgml/ref/pg_restore.sgml index b649bd3a5ae0f..2abe05d47e936 100644 --- a/doc/src/sgml/ref/pg_restore.sgml +++ b/doc/src/sgml/ref/pg_restore.sgml @@ -18,9 +18,8 @@ PostgreSQL documentation pg_restore - restore PostgreSQL databases from archives - created by pg_dump or - pg_dumpall + restore a PostgreSQL database from an + archive file created by pg_dump @@ -39,14 +38,13 @@ PostgreSQL documentation pg_restore is a utility for restoring a - PostgreSQL database or cluster from an archive - created by or - in one of the non-plain-text + PostgreSQL database from an archive + created by in one of the non-plain-text formats. It will issue the commands necessary to reconstruct the - database or cluster to the state it was in at the time it was saved. The - archives also allow pg_restore to + database to the state it was in at the time it was saved. The + archive files also allow pg_restore to be selective about what is restored, or even to reorder the items - prior to being restored. The archive formats are designed to be + prior to being restored. The archive files are designed to be portable across architectures. @@ -54,17 +52,10 @@ PostgreSQL documentation pg_restore can operate in two modes. If a database name is specified, pg_restore connects to that database and restores archive contents directly into - the database. - When restoring from a dump made by pg_dumpall, - each database will be created and then the restoration will be run in that - database. - - Otherwise, when a database name is not specified, a script containing the SQL - commands necessary to rebuild the database or cluster is created and written + the database. Otherwise, a script containing the SQL + commands necessary to rebuild the database is created and written to a file or standard output. This script output is equivalent to - the plain text output format of pg_dump or - pg_dumpall. - + the plain text output format of pg_dump. Some of the options controlling the output are therefore analogous to pg_dump options. @@ -149,8 +140,6 @@ PostgreSQL documentation commands that mention this database. Access privileges for the database itself are also restored, unless is specified. - is required when restoring multiple databases - from an archive created by pg_dumpall. @@ -246,19 +235,6 @@ PostgreSQL documentation - - - - - - Restore only global objects (roles and tablespaces), no databases. - - - This option is only relevant when restoring from an archive made using pg_dumpall. - - - - @@ -603,28 +579,6 @@ PostgreSQL documentation - - - - - Do not restore databases whose name matches - pattern. - Multiple patterns can be excluded by writing multiple - switches. The - pattern parameter is - interpreted as a pattern according to the same rules used by - psql's \d - commands (see ), - so multiple databases can also be excluded by writing wildcard - characters in the pattern. When using wildcards, be careful to - quote the pattern if needed to prevent shell wildcard expansion. - - - This option is only relevant when restoring from an archive made using pg_dumpall. - - - - diff --git a/src/bin/pg_dump/meson.build b/src/bin/pg_dump/meson.build index 4a4ebbd8ec94f..a2233b0a1b431 100644 --- a/src/bin/pg_dump/meson.build +++ b/src/bin/pg_dump/meson.build @@ -102,7 +102,6 @@ tests += { 't/003_pg_dump_with_server.pl', 't/004_pg_dump_parallel.pl', 't/005_pg_dump_filterfile.pl', - 't/006_pg_dumpall.pl', 't/010_dump_connstr.pl', ], }, diff --git a/src/bin/pg_dump/parallel.c b/src/bin/pg_dump/parallel.c index 5974d6706fd57..086adcdc50295 100644 --- a/src/bin/pg_dump/parallel.c +++ b/src/bin/pg_dump/parallel.c @@ -333,16 +333,6 @@ on_exit_close_archive(Archive *AHX) on_exit_nicely(archive_close_connection, &shutdown_info); } -/* - * When pg_restore restores multiple databases, then update already added entry - * into array for cleanup. - */ -void -replace_on_exit_close_archive(Archive *AHX) -{ - shutdown_info.AHX = AHX; -} - /* * on_exit_nicely handler for shutting down database connections and * worker processes cleanly. diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index af0007fb6d2f1..4ebef1e864451 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -308,7 +308,7 @@ extern void SetArchiveOptions(Archive *AH, DumpOptions *dopt, RestoreOptions *ro extern void ProcessArchiveRestoreOptions(Archive *AHX); -extern void RestoreArchive(Archive *AHX, bool append_data); +extern void RestoreArchive(Archive *AHX); /* Open an existing archive */ extern Archive *OpenArchive(const char *FileSpec, const ArchiveFormat fmt); diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index 30e0da31aa340..dce88f040ace3 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -87,7 +87,7 @@ static int RestoringToDB(ArchiveHandle *AH); static void dump_lo_buf(ArchiveHandle *AH); static void dumpTimestamp(ArchiveHandle *AH, const char *msg, time_t tim); static void SetOutput(ArchiveHandle *AH, const char *filename, - const pg_compress_specification compression_spec, bool append_data); + const pg_compress_specification compression_spec); static CompressFileHandle *SaveOutput(ArchiveHandle *AH); static void RestoreOutput(ArchiveHandle *AH, CompressFileHandle *savedOutput); @@ -339,14 +339,9 @@ ProcessArchiveRestoreOptions(Archive *AHX) StrictNamesCheck(ropt); } -/* - * RestoreArchive - * - * If append_data is set, then append data into file as we are restoring dump - * of multiple databases which was taken by pg_dumpall. - */ +/* Public */ void -RestoreArchive(Archive *AHX, bool append_data) +RestoreArchive(Archive *AHX) { ArchiveHandle *AH = (ArchiveHandle *) AHX; RestoreOptions *ropt = AH->public.ropt; @@ -463,7 +458,7 @@ RestoreArchive(Archive *AHX, bool append_data) */ sav = SaveOutput(AH); if (ropt->filename || ropt->compression_spec.algorithm != PG_COMPRESSION_NONE) - SetOutput(AH, ropt->filename, ropt->compression_spec, append_data); + SetOutput(AH, ropt->filename, ropt->compression_spec); ahprintf(AH, "--\n-- PostgreSQL database dump\n--\n\n"); @@ -1302,7 +1297,7 @@ PrintTOCSummary(Archive *AHX) sav = SaveOutput(AH); if (ropt->filename) - SetOutput(AH, ropt->filename, out_compression_spec, false); + SetOutput(AH, ropt->filename, out_compression_spec); if (strftime(stamp_str, sizeof(stamp_str), PGDUMP_STRFTIME_FMT, localtime(&AH->createDate)) == 0) @@ -1681,8 +1676,7 @@ archprintf(Archive *AH, const char *fmt,...) static void SetOutput(ArchiveHandle *AH, const char *filename, - const pg_compress_specification compression_spec, - bool append_data) + const pg_compress_specification compression_spec) { CompressFileHandle *CFH; const char *mode; @@ -1702,7 +1696,7 @@ SetOutput(ArchiveHandle *AH, const char *filename, else fn = fileno(stdout); - if (append_data || AH->mode == archModeAppend) + if (AH->mode == archModeAppend) mode = PG_BINARY_A; else mode = PG_BINARY_W; diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h index 365073b3eae45..325b53fc9bd4b 100644 --- a/src/bin/pg_dump/pg_backup_archiver.h +++ b/src/bin/pg_dump/pg_backup_archiver.h @@ -394,7 +394,6 @@ struct _tocEntry extern int parallel_restore(ArchiveHandle *AH, TocEntry *te); extern void on_exit_close_archive(Archive *AHX); -extern void replace_on_exit_close_archive(Archive *AHX); extern void warn_or_exit_horribly(ArchiveHandle *AH, const char *fmt,...) pg_attribute_printf(2, 3); diff --git a/src/bin/pg_dump/pg_backup_tar.c b/src/bin/pg_dump/pg_backup_tar.c index d94d0de2a5d17..b5ba3b46dd999 100644 --- a/src/bin/pg_dump/pg_backup_tar.c +++ b/src/bin/pg_dump/pg_backup_tar.c @@ -826,7 +826,7 @@ _CloseArchive(ArchiveHandle *AH) savVerbose = AH->public.verbose; AH->public.verbose = 0; - RestoreArchive((Archive *) AH, false); + RestoreArchive((Archive *) AH); SetArchiveOptions((Archive *) AH, savDopt, savRopt); diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 6298edb26b5df..1da6bd7d9726c 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -1265,7 +1265,7 @@ main(int argc, char **argv) * right now. */ if (plainText) - RestoreArchive(fout, false); + RestoreArchive(fout); CloseArchive(fout); diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c index 100317b1aa949..87d10df07c411 100644 --- a/src/bin/pg_dump/pg_dumpall.c +++ b/src/bin/pg_dump/pg_dumpall.c @@ -65,10 +65,9 @@ static void dropTablespaces(PGconn *conn); static void dumpTablespaces(PGconn *conn); static void dropDBs(PGconn *conn); static void dumpUserConfig(PGconn *conn, const char *username); -static void dumpDatabases(PGconn *conn, ArchiveFormat archDumpFormat); +static void dumpDatabases(PGconn *conn); static void dumpTimestamp(const char *msg); -static int runPgDump(const char *dbname, const char *create_opts, - char *dbfile, ArchiveFormat archDumpFormat); +static int runPgDump(const char *dbname, const char *create_opts); static void buildShSecLabels(PGconn *conn, const char *catalog_name, Oid objectId, const char *objtype, const char *objname, @@ -77,7 +76,6 @@ static void executeCommand(PGconn *conn, const char *query); static void expand_dbname_patterns(PGconn *conn, SimpleStringList *patterns, SimpleStringList *names); static void read_dumpall_filters(const char *filename, SimpleStringList *pattern); -static ArchiveFormat parseDumpFormat(const char *format); static char pg_dump_bin[MAXPGPATH]; static PQExpBuffer pgdumpopts; @@ -150,7 +148,6 @@ main(int argc, char *argv[]) {"password", no_argument, NULL, 'W'}, {"no-privileges", no_argument, NULL, 'x'}, {"no-acl", no_argument, NULL, 'x'}, - {"format", required_argument, NULL, 'F'}, /* * the following options don't have an equivalent short option letter @@ -201,8 +198,6 @@ main(int argc, char *argv[]) char *pgdb = NULL; char *use_role = NULL; const char *dumpencoding = NULL; - ArchiveFormat archDumpFormat = archNull; - const char *formatName = "p"; trivalue prompt_password = TRI_DEFAULT; bool data_only = false; bool globals_only = false; @@ -252,7 +247,7 @@ main(int argc, char *argv[]) pgdumpopts = createPQExpBuffer(); - while ((c = getopt_long(argc, argv, "acd:E:f:F:gh:l:Op:rsS:tU:vwWx", long_options, &optindex)) != -1) + while ((c = getopt_long(argc, argv, "acd:E:f:gh:l:Op:rsS:tU:vwWx", long_options, &optindex)) != -1) { switch (c) { @@ -280,9 +275,7 @@ main(int argc, char *argv[]) appendPQExpBufferStr(pgdumpopts, " -f "); appendShellString(pgdumpopts, filename); break; - case 'F': - formatName = pg_strdup(optarg); - break; + case 'g': globals_only = true; break; @@ -431,21 +424,6 @@ main(int argc, char *argv[]) exit_nicely(1); } - /* Get format for dump. */ - archDumpFormat = parseDumpFormat(formatName); - - /* - * If a non-plain format is specified, a file name is also required as the - * path to the main directory. - */ - if (archDumpFormat != archNull && - (!filename || strcmp(filename, "") == 0)) - { - pg_log_error("option -F/--format=d|c|t requires option -f/--file"); - pg_log_error_hint("Try \"%s --help\" for more information.", progname); - exit_nicely(1); - } - /* * If password values are not required in the dump, switch to using * pg_roles which is equally useful, just more likely to have unrestricted @@ -510,33 +488,6 @@ main(int argc, char *argv[]) if (sequence_data) appendPQExpBufferStr(pgdumpopts, " --sequence-data"); - /* - * Open the output file if required, otherwise use stdout. If required, - * then create new directory and global.dat file. - */ - if (archDumpFormat != archNull) - { - char global_path[MAXPGPATH]; - - /* Create new directory or accept the empty existing directory. */ - create_or_open_dir(filename); - - snprintf(global_path, MAXPGPATH, "%s/global.dat", filename); - - OPF = fopen(global_path, PG_BINARY_W); - if (!OPF) - pg_fatal("could not open file \"%s\": %m", global_path); - } - else if (filename) - { - OPF = fopen(filename, PG_BINARY_W); - if (!OPF) - pg_fatal("could not open output file \"%s\": %m", - filename); - } - else - OPF = stdout; - /* * If there was a database specified on the command line, use that, * otherwise try to connect to database "postgres", and failing that @@ -576,6 +527,19 @@ main(int argc, char *argv[]) expand_dbname_patterns(conn, &database_exclude_patterns, &database_exclude_names); + /* + * Open the output file if required, otherwise use stdout + */ + if (filename) + { + OPF = fopen(filename, PG_BINARY_W); + if (!OPF) + pg_fatal("could not open output file \"%s\": %m", + filename); + } + else + OPF = stdout; + /* * Set the client encoding if requested. */ @@ -675,7 +639,7 @@ main(int argc, char *argv[]) } if (!globals_only && !roles_only && !tablespaces_only) - dumpDatabases(conn, archDumpFormat); + dumpDatabases(conn); PQfinish(conn); @@ -688,7 +652,7 @@ main(int argc, char *argv[]) fclose(OPF); /* sync the resulting file, errors are not fatal */ - if (dosync && (archDumpFormat == archNull)) + if (dosync) (void) fsync_fname(filename, false); } @@ -699,14 +663,12 @@ main(int argc, char *argv[]) static void help(void) { - printf(_("%s exports a PostgreSQL database cluster as an SQL script or to other formats.\n\n"), progname); + printf(_("%s exports a PostgreSQL database cluster as an SQL script.\n\n"), progname); printf(_("Usage:\n")); printf(_(" %s [OPTION]...\n"), progname); printf(_("\nGeneral options:\n")); printf(_(" -f, --file=FILENAME output file name\n")); - printf(_(" -F, --format=c|d|t|p output file format (custom, directory, tar,\n" - " plain text (default))\n")); printf(_(" -v, --verbose verbose mode\n")); printf(_(" -V, --version output version information, then exit\n")); printf(_(" --lock-wait-timeout=TIMEOUT fail after waiting TIMEOUT for a table lock\n")); @@ -1013,6 +975,9 @@ dumpRoles(PGconn *conn) * We do it this way because config settings for roles could mention the * names of other roles. */ + if (PQntuples(res) > 0) + fprintf(OPF, "\n--\n-- User Configurations\n--\n"); + for (i = 0; i < PQntuples(res); i++) dumpUserConfig(conn, PQgetvalue(res, i, i_rolname)); @@ -1526,7 +1491,6 @@ dumpUserConfig(PGconn *conn, const char *username) { PQExpBuffer buf = createPQExpBuffer(); PGresult *res; - static bool header_done = false; printfPQExpBuffer(buf, "SELECT unnest(setconfig) FROM pg_db_role_setting " "WHERE setdatabase = 0 AND setrole = " @@ -1538,13 +1502,7 @@ dumpUserConfig(PGconn *conn, const char *username) res = executeQuery(conn, buf->data); if (PQntuples(res) > 0) - { - if (!header_done) - fprintf(OPF, "\n--\n-- User Configurations\n--\n"); - header_done = true; - fprintf(OPF, "\n--\n-- User Config \"%s\"\n--\n\n", username); - } for (int i = 0; i < PQntuples(res); i++) { @@ -1618,13 +1576,10 @@ expand_dbname_patterns(PGconn *conn, * Dump contents of databases. */ static void -dumpDatabases(PGconn *conn, ArchiveFormat archDumpFormat) +dumpDatabases(PGconn *conn) { PGresult *res; int i; - char db_subdir[MAXPGPATH]; - char dbfilepath[MAXPGPATH]; - FILE *map_file = NULL; /* * Skip databases marked not datallowconn, since we'd be unable to connect @@ -1638,42 +1593,18 @@ dumpDatabases(PGconn *conn, ArchiveFormat archDumpFormat) * doesn't have some failure mode with --clean. */ res = executeQuery(conn, - "SELECT datname, oid " + "SELECT datname " "FROM pg_database d " "WHERE datallowconn AND datconnlimit != -2 " "ORDER BY (datname <> 'template1'), datname"); - if (archDumpFormat == archNull && PQntuples(res) > 0) + if (PQntuples(res) > 0) fprintf(OPF, "--\n-- Databases\n--\n\n"); - /* - * If directory/tar/custom format is specified, create a subdirectory - * under the main directory and each database dump file or subdirectory - * will be created in that subdirectory by pg_dump. - */ - if (archDumpFormat != archNull) - { - char map_file_path[MAXPGPATH]; - - snprintf(db_subdir, MAXPGPATH, "%s/databases", filename); - - /* Create a subdirectory with 'databases' name under main directory. */ - if (mkdir(db_subdir, pg_dir_create_mode) != 0) - pg_fatal("could not create directory \"%s\": %m", db_subdir); - - snprintf(map_file_path, MAXPGPATH, "%s/map.dat", filename); - - /* Create a map file (to store dboid and dbname) */ - map_file = fopen(map_file_path, PG_BINARY_W); - if (!map_file) - pg_fatal("could not open file \"%s\": %m", map_file_path); - } - for (i = 0; i < PQntuples(res); i++) { char *dbname = PQgetvalue(res, i, 0); - char *oid = PQgetvalue(res, i, 1); - const char *create_opts = ""; + const char *create_opts; int ret; /* Skip template0, even if it's not marked !datallowconn. */ @@ -1687,27 +1618,9 @@ dumpDatabases(PGconn *conn, ArchiveFormat archDumpFormat) continue; } - /* - * If this is not a plain format dump, then append dboid and dbname to - * the map.dat file. - */ - if (archDumpFormat != archNull) - { - if (archDumpFormat == archCustom) - snprintf(dbfilepath, MAXPGPATH, "\"%s\"/\"%s\".dmp", db_subdir, oid); - else if (archDumpFormat == archTar) - snprintf(dbfilepath, MAXPGPATH, "\"%s\"/\"%s\".tar", db_subdir, oid); - else - snprintf(dbfilepath, MAXPGPATH, "\"%s\"/\"%s\"", db_subdir, oid); - - /* Put one line entry for dboid and dbname in map file. */ - fprintf(map_file, "%s %s\n", oid, dbname); - } - pg_log_info("dumping database \"%s\"", dbname); - if (archDumpFormat == archNull) - fprintf(OPF, "--\n-- Database \"%s\" dump\n--\n\n", dbname); + fprintf(OPF, "--\n-- Database \"%s\" dump\n--\n\n", dbname); /* * We assume that "template1" and "postgres" already exist in the @@ -1721,9 +1634,12 @@ dumpDatabases(PGconn *conn, ArchiveFormat archDumpFormat) { if (output_clean) create_opts = "--clean --create"; - /* Since pg_dump won't emit a \connect command, we must */ - else if (archDumpFormat == archNull) + else + { + create_opts = ""; + /* Since pg_dump won't emit a \connect command, we must */ fprintf(OPF, "\\connect %s\n\n", dbname); + } } else create_opts = "--create"; @@ -1731,30 +1647,19 @@ dumpDatabases(PGconn *conn, ArchiveFormat archDumpFormat) if (filename) fclose(OPF); - ret = runPgDump(dbname, create_opts, dbfilepath, archDumpFormat); + ret = runPgDump(dbname, create_opts); if (ret != 0) pg_fatal("pg_dump failed on database \"%s\", exiting", dbname); if (filename) { - char global_path[MAXPGPATH]; - - if (archDumpFormat != archNull) - snprintf(global_path, MAXPGPATH, "%s/global.dat", filename); - else - snprintf(global_path, MAXPGPATH, "%s", filename); - - OPF = fopen(global_path, PG_BINARY_A); + OPF = fopen(filename, PG_BINARY_A); if (!OPF) pg_fatal("could not re-open the output file \"%s\": %m", - global_path); + filename); } } - /* Close map file */ - if (archDumpFormat != archNull) - fclose(map_file); - PQclear(res); } @@ -1764,8 +1669,7 @@ dumpDatabases(PGconn *conn, ArchiveFormat archDumpFormat) * Run pg_dump on dbname, with specified options. */ static int -runPgDump(const char *dbname, const char *create_opts, char *dbfile, - ArchiveFormat archDumpFormat) +runPgDump(const char *dbname, const char *create_opts) { PQExpBufferData connstrbuf; PQExpBufferData cmd; @@ -1774,36 +1678,17 @@ runPgDump(const char *dbname, const char *create_opts, char *dbfile, initPQExpBuffer(&connstrbuf); initPQExpBuffer(&cmd); + printfPQExpBuffer(&cmd, "\"%s\" %s %s", pg_dump_bin, + pgdumpopts->data, create_opts); + /* - * If this is not a plain format dump, then append file name and dump - * format to the pg_dump command to get archive dump. + * If we have a filename, use the undocumented plain-append pg_dump + * format. */ - if (archDumpFormat != archNull) - { - printfPQExpBuffer(&cmd, "\"%s\" -f %s %s", pg_dump_bin, - dbfile, create_opts); - - if (archDumpFormat == archDirectory) - appendPQExpBufferStr(&cmd, " --format=directory "); - else if (archDumpFormat == archCustom) - appendPQExpBufferStr(&cmd, " --format=custom "); - else if (archDumpFormat == archTar) - appendPQExpBufferStr(&cmd, " --format=tar "); - } + if (filename) + appendPQExpBufferStr(&cmd, " -Fa "); else - { - printfPQExpBuffer(&cmd, "\"%s\" %s %s", pg_dump_bin, - pgdumpopts->data, create_opts); - - /* - * If we have a filename, use the undocumented plain-append pg_dump - * format. - */ - if (filename) - appendPQExpBufferStr(&cmd, " -Fa "); - else - appendPQExpBufferStr(&cmd, " -Fp "); - } + appendPQExpBufferStr(&cmd, " -Fp "); /* * Append the database name to the already-constructed stem of connection @@ -1948,36 +1833,3 @@ read_dumpall_filters(const char *filename, SimpleStringList *pattern) filter_free(&fstate); } - -/* - * parseDumpFormat - * - * This will validate dump formats. - */ -static ArchiveFormat -parseDumpFormat(const char *format) -{ - ArchiveFormat archDumpFormat; - - if (pg_strcasecmp(format, "c") == 0) - archDumpFormat = archCustom; - else if (pg_strcasecmp(format, "custom") == 0) - archDumpFormat = archCustom; - else if (pg_strcasecmp(format, "d") == 0) - archDumpFormat = archDirectory; - else if (pg_strcasecmp(format, "directory") == 0) - archDumpFormat = archDirectory; - else if (pg_strcasecmp(format, "p") == 0) - archDumpFormat = archNull; - else if (pg_strcasecmp(format, "plain") == 0) - archDumpFormat = archNull; - else if (pg_strcasecmp(format, "t") == 0) - archDumpFormat = archTar; - else if (pg_strcasecmp(format, "tar") == 0) - archDumpFormat = archTar; - else - pg_fatal("unrecognized output format \"%s\"; please specify \"c\", \"d\", \"p\", or \"t\"", - format); - - return archDumpFormat; -} diff --git a/src/bin/pg_dump/pg_restore.c b/src/bin/pg_dump/pg_restore.c index 6ef789cb06d63..b4e1acdb63fbb 100644 --- a/src/bin/pg_dump/pg_restore.c +++ b/src/bin/pg_dump/pg_restore.c @@ -2,7 +2,7 @@ * * pg_restore.c * pg_restore is an utility extracting postgres database definitions - * from a backup archive created by pg_dump/pg_dumpall using the archiver + * from a backup archive created by pg_dump using the archiver * interface. * * pg_restore will read the backup archive and @@ -41,15 +41,11 @@ #include "postgres_fe.h" #include -#include #ifdef HAVE_TERMIOS_H #include #endif -#include "common/string.h" -#include "connectdb.h" #include "fe_utils/option_utils.h" -#include "fe_utils/string_utils.h" #include "filter.h" #include "getopt_long.h" #include "parallel.h" @@ -57,43 +53,18 @@ static void usage(const char *progname); static void read_restore_filters(const char *filename, RestoreOptions *opts); -static bool file_exists_in_directory(const char *dir, const char *filename); -static int restore_one_database(const char *inputFileSpec, RestoreOptions *opts, - int numWorkers, bool append_data, int num); -static int read_one_statement(StringInfo inBuf, FILE *pfile); -static int restore_all_databases(PGconn *conn, const char *dumpdirpath, - SimpleStringList db_exclude_patterns, RestoreOptions *opts, int numWorkers); -static int process_global_sql_commands(PGconn *conn, const char *dumpdirpath, - const char *outfile); -static void copy_or_print_global_file(const char *outfile, FILE *pfile); -static int get_dbnames_list_to_restore(PGconn *conn, - SimplePtrList *dbname_oid_list, - SimpleStringList db_exclude_patterns); -static int get_dbname_oid_list_from_mfile(const char *dumpdirpath, - SimplePtrList *dbname_oid_list); - -/* - * Stores a database OID and the corresponding name. - */ -typedef struct DbOidName -{ - Oid oid; - char str[FLEXIBLE_ARRAY_MEMBER]; /* null-terminated string here */ -} DbOidName; - int main(int argc, char **argv) { RestoreOptions *opts; int c; + int exit_code; int numWorkers = 1; + Archive *AH; char *inputFileSpec; bool data_only = false; bool schema_only = false; - int n_errors = 0; - bool globals_only = false; - SimpleStringList db_exclude_patterns = {NULL, NULL}; static int disable_triggers = 0; static int enable_row_security = 0; static int if_exists = 0; @@ -119,7 +90,6 @@ main(int argc, char **argv) {"clean", 0, NULL, 'c'}, {"create", 0, NULL, 'C'}, {"data-only", 0, NULL, 'a'}, - {"globals-only", 0, NULL, 'g'}, {"dbname", 1, NULL, 'd'}, {"exit-on-error", 0, NULL, 'e'}, {"exclude-schema", 1, NULL, 'N'}, @@ -174,7 +144,6 @@ main(int argc, char **argv) {"with-statistics", no_argument, &with_statistics, 1}, {"statistics-only", no_argument, &statistics_only, 1}, {"filter", required_argument, NULL, 4}, - {"exclude-database", required_argument, NULL, 6}, {NULL, 0, NULL, 0} }; @@ -203,7 +172,7 @@ main(int argc, char **argv) } } - while ((c = getopt_long(argc, argv, "acCd:ef:F:gh:I:j:lL:n:N:Op:P:RsS:t:T:U:vwWx1", + while ((c = getopt_long(argc, argv, "acCd:ef:F:h:I:j:lL:n:N:Op:P:RsS:t:T:U:vwWx1", cmdopts, NULL)) != -1) { switch (c) @@ -230,14 +199,11 @@ main(int argc, char **argv) if (strlen(optarg) != 0) opts->formatName = pg_strdup(optarg); break; - case 'g': - /* restore only global.dat file from directory */ - globals_only = true; - break; case 'h': if (strlen(optarg) != 0) opts->cparams.pghost = pg_strdup(optarg); break; + case 'j': /* number of restore jobs */ if (!option_parse_int(optarg, "-j/--jobs", 1, PG_MAX_JOBS, @@ -352,9 +318,6 @@ main(int argc, char **argv) exit(1); opts->exit_on_error = true; break; - case 6: /* database patterns to skip */ - simple_string_list_append(&db_exclude_patterns, optarg); - break; default: /* getopt_long already emitted a complaint */ @@ -382,13 +345,6 @@ main(int argc, char **argv) if (!opts->cparams.dbname && !opts->filename && !opts->tocSummary) pg_fatal("one of -d/--dbname and -f/--file must be specified"); - if (db_exclude_patterns.head != NULL && globals_only) - { - pg_log_error("option --exclude-database cannot be used together with -g/--globals-only"); - pg_log_error_hint("Try \"%s --help\" for more information.", progname); - exit_nicely(1); - } - /* Should get at most one of -d and -f, else user is confused */ if (opts->cparams.dbname) { @@ -496,114 +452,6 @@ main(int argc, char **argv) opts->formatName); } - /* - * If toc.dat file is not present in the current path, then check for - * global.dat. If global.dat file is present, then restore all the - * databases from map.dat (if it exists), but skip restoring those - * matching --exclude-database patterns. - */ - if (inputFileSpec != NULL && !file_exists_in_directory(inputFileSpec, "toc.dat") && - file_exists_in_directory(inputFileSpec, "global.dat")) - { - PGconn *conn = NULL; /* Connection to restore global sql - * commands. */ - - /* - * Can only use --list or --use-list options with a single database - * dump. - */ - if (opts->tocSummary) - pg_fatal("option -l/--list cannot be used when restoring an archive created by pg_dumpall"); - else if (opts->tocFile) - pg_fatal("option -L/--use-list cannot be used when restoring an archive created by pg_dumpall"); - - /* - * To restore from a pg_dumpall archive, -C (create database) option - * must be specified unless we are only restoring globals. - */ - if (!globals_only && opts->createDB != 1) - { - pg_log_error("option -C/--create must be specified when restoring an archive created by pg_dumpall"); - pg_log_error_hint("Try \"%s --help\" for more information.", progname); - pg_log_error_hint("Individual databases can be restored using their specific archives."); - exit_nicely(1); - } - - /* - * Connect to the database to execute global sql commands from - * global.dat file. - */ - if (opts->cparams.dbname) - { - conn = ConnectDatabase(opts->cparams.dbname, NULL, opts->cparams.pghost, - opts->cparams.pgport, opts->cparams.username, TRI_DEFAULT, - false, progname, NULL, NULL, NULL, NULL); - - - if (!conn) - pg_fatal("could not connect to database \"%s\"", opts->cparams.dbname); - } - - /* If globals-only, then return from here. */ - if (globals_only) - { - /* - * Open global.dat file and execute/append all the global sql - * commands. - */ - n_errors = process_global_sql_commands(conn, inputFileSpec, - opts->filename); - - if (conn) - PQfinish(conn); - - pg_log_info("database restoring skipped because option -g/--globals-only was specified"); - } - else - { - /* Now restore all the databases from map.dat */ - n_errors = restore_all_databases(conn, inputFileSpec, db_exclude_patterns, - opts, numWorkers); - } - - /* Free db pattern list. */ - simple_string_list_destroy(&db_exclude_patterns); - } - else /* process if global.dat file does not exist. */ - { - if (db_exclude_patterns.head != NULL) - pg_fatal("option --exclude-database can be used only when restoring an archive created by pg_dumpall"); - - if (globals_only) - pg_fatal("option -g/--globals-only can be used only when restoring an archive created by pg_dumpall"); - - n_errors = restore_one_database(inputFileSpec, opts, numWorkers, false, 0); - } - - /* Done, print a summary of ignored errors during restore. */ - if (n_errors) - { - pg_log_warning("errors ignored on restore: %d", n_errors); - return 1; - } - - return 0; -} - -/* - * restore_one_database - * - * This will restore one database using toc.dat file. - * - * returns the number of errors while doing restore. - */ -static int -restore_one_database(const char *inputFileSpec, RestoreOptions *opts, - int numWorkers, bool append_data, int num) -{ - Archive *AH; - int n_errors; - AH = OpenArchive(inputFileSpec, opts->format); SetArchiveOptions(AH, NULL, opts); @@ -611,15 +459,9 @@ restore_one_database(const char *inputFileSpec, RestoreOptions *opts, /* * We don't have a connection yet but that doesn't matter. The connection * is initialized to NULL and if we terminate through exit_nicely() while - * it's still NULL, the cleanup function will just be a no-op. If we are - * restoring multiple databases, then only update AX handle for cleanup as - * the previous entry was already in the array and we had closed previous - * connection, so we can use the same array slot. + * it's still NULL, the cleanup function will just be a no-op. */ - if (!append_data || num == 0) - on_exit_close_archive(AH); - else - replace_on_exit_close_archive(AH); + on_exit_close_archive(AH); /* Let the archiver know how noisy to be */ AH->verbose = opts->verbose; @@ -639,21 +481,25 @@ restore_one_database(const char *inputFileSpec, RestoreOptions *opts, else { ProcessArchiveRestoreOptions(AH); - RestoreArchive(AH, append_data); + RestoreArchive(AH); } - n_errors = AH->n_errors; + /* done, print a summary of ignored errors */ + if (AH->n_errors) + pg_log_warning("errors ignored on restore: %d", AH->n_errors); /* AH may be freed in CloseArchive? */ + exit_code = AH->n_errors ? 1 : 0; + CloseArchive(AH); - return n_errors; + return exit_code; } static void usage(const char *progname) { - printf(_("%s restores PostgreSQL databases from archives created by pg_dump or pg_dumpall.\n\n"), progname); + printf(_("%s restores a PostgreSQL database from an archive created by pg_dump.\n\n"), progname); printf(_("Usage:\n")); printf(_(" %s [OPTION]... [FILE]\n"), progname); @@ -671,7 +517,6 @@ usage(const char *progname) printf(_(" -c, --clean clean (drop) database objects before recreating\n")); printf(_(" -C, --create create the target database\n")); printf(_(" -e, --exit-on-error exit on error, default is to continue\n")); - printf(_(" -g, --globals-only restore only global objects, no databases\n")); printf(_(" -I, --index=NAME restore named index\n")); printf(_(" -j, --jobs=NUM use this many parallel jobs to restore\n")); printf(_(" -L, --use-list=FILENAME use table of contents from this file for\n" @@ -688,7 +533,6 @@ usage(const char *progname) printf(_(" -1, --single-transaction restore as a single transaction\n")); printf(_(" --disable-triggers disable triggers during data-only restore\n")); printf(_(" --enable-row-security enable row security\n")); - printf(_(" --exclude-database=PATTERN do not restore the specified database(s)\n")); printf(_(" --filter=FILENAME restore or skip objects based on expressions\n" " in FILENAME\n")); printf(_(" --if-exists use IF EXISTS when dropping objects\n")); @@ -725,8 +569,8 @@ usage(const char *progname) printf(_(" --role=ROLENAME do SET ROLE before restore\n")); printf(_("\n" - "The options -I, -n, -N, -P, -t, -T, --section, and --exclude-database can be\n" - "combined and specified multiple times to select multiple objects.\n")); + "The options -I, -n, -N, -P, -t, -T, and --section can be combined and specified\n" + "multiple times to select multiple objects.\n")); printf(_("\nIf no input file name is supplied, then standard input is used.\n\n")); printf(_("Report bugs to <%s>.\n"), PACKAGE_BUGREPORT); printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL); @@ -831,585 +675,3 @@ read_restore_filters(const char *filename, RestoreOptions *opts) filter_free(&fstate); } - -/* - * file_exists_in_directory - * - * Returns true if the file exists in the given directory. - */ -static bool -file_exists_in_directory(const char *dir, const char *filename) -{ - struct stat st; - char buf[MAXPGPATH]; - - if (snprintf(buf, MAXPGPATH, "%s/%s", dir, filename) >= MAXPGPATH) - pg_fatal("directory name too long: \"%s\"", dir); - - return (stat(buf, &st) == 0 && S_ISREG(st.st_mode)); -} - -/* - * read_one_statement - * - * This will start reading from passed file pointer using fgetc and read till - * semicolon(sql statement terminator for global.dat file) - * - * EOF is returned if end-of-file input is seen; time to shut down. - */ - -static int -read_one_statement(StringInfo inBuf, FILE *pfile) -{ - int c; /* character read from getc() */ - int m; - - StringInfoData q; - - initStringInfo(&q); - - resetStringInfo(inBuf); - - /* - * Read characters until EOF or the appropriate delimiter is seen. - */ - while ((c = fgetc(pfile)) != EOF) - { - if (c != '\'' && c != '"' && c != '\n' && c != ';') - { - appendStringInfoChar(inBuf, (char) c); - while ((c = fgetc(pfile)) != EOF) - { - if (c != '\'' && c != '"' && c != ';' && c != '\n') - appendStringInfoChar(inBuf, (char) c); - else - break; - } - } - - if (c == '\'' || c == '"') - { - appendStringInfoChar(&q, (char) c); - m = c; - - while ((c = fgetc(pfile)) != EOF) - { - appendStringInfoChar(&q, (char) c); - - if (c == m) - { - appendStringInfoString(inBuf, q.data); - resetStringInfo(&q); - break; - } - } - } - - if (c == ';') - { - appendStringInfoChar(inBuf, (char) ';'); - break; - } - - if (c == '\n') - appendStringInfoChar(inBuf, (char) '\n'); - } - - pg_free(q.data); - - /* No input before EOF signal means time to quit. */ - if (c == EOF && inBuf->len == 0) - return EOF; - - /* return something that's not EOF */ - return 'Q'; -} - -/* - * get_dbnames_list_to_restore - * - * This will mark for skipping any entries from dbname_oid_list that pattern match an - * entry in the db_exclude_patterns list. - * - * Returns the number of database to be restored. - * - */ -static int -get_dbnames_list_to_restore(PGconn *conn, - SimplePtrList *dbname_oid_list, - SimpleStringList db_exclude_patterns) -{ - int count_db = 0; - PQExpBuffer query; - PGresult *res; - - query = createPQExpBuffer(); - - if (!conn) - pg_log_info("considering PATTERN as NAME for --exclude-database option as no database connection while doing pg_restore"); - - /* - * Process one by one all dbnames and if specified to skip restoring, then - * remove dbname from list. - */ - for (SimplePtrListCell *db_cell = dbname_oid_list->head; - db_cell; db_cell = db_cell->next) - { - DbOidName *dbidname = (DbOidName *) db_cell->ptr; - bool skip_db_restore = false; - PQExpBuffer db_lit = createPQExpBuffer(); - - appendStringLiteralConn(db_lit, dbidname->str, conn); - - for (SimpleStringListCell *pat_cell = db_exclude_patterns.head; pat_cell; pat_cell = pat_cell->next) - { - /* - * If there is an exact match then we don't need to try a pattern - * match - */ - if (pg_strcasecmp(dbidname->str, pat_cell->val) == 0) - skip_db_restore = true; - /* Otherwise, try a pattern match if there is a connection */ - else if (conn) - { - int dotcnt; - - appendPQExpBufferStr(query, "SELECT 1 "); - processSQLNamePattern(conn, query, pat_cell->val, false, - false, NULL, db_lit->data, - NULL, NULL, NULL, &dotcnt); - - if (dotcnt > 0) - { - pg_log_error("improper qualified name (too many dotted names): %s", - dbidname->str); - PQfinish(conn); - exit_nicely(1); - } - - res = executeQuery(conn, query->data); - - if ((PQresultStatus(res) == PGRES_TUPLES_OK) && PQntuples(res)) - { - skip_db_restore = true; - pg_log_info("database name \"%s\" matches exclude pattern \"%s\"", dbidname->str, pat_cell->val); - } - - PQclear(res); - resetPQExpBuffer(query); - } - - if (skip_db_restore) - break; - } - - destroyPQExpBuffer(db_lit); - - /* - * Mark db to be skipped or increment the counter of dbs to be - * restored - */ - if (skip_db_restore) - { - pg_log_info("excluding database \"%s\"", dbidname->str); - dbidname->oid = InvalidOid; - } - else - { - count_db++; - } - } - - destroyPQExpBuffer(query); - - return count_db; -} - -/* - * get_dbname_oid_list_from_mfile - * - * Open map.dat file and read line by line and then prepare a list of database - * names and corresponding db_oid. - * - * Returns, total number of database names in map.dat file. - */ -static int -get_dbname_oid_list_from_mfile(const char *dumpdirpath, SimplePtrList *dbname_oid_list) -{ - StringInfoData linebuf; - FILE *pfile; - char map_file_path[MAXPGPATH]; - int count = 0; - - - /* - * If there is only global.dat file in dump, then return from here as - * there is no database to restore. - */ - if (!file_exists_in_directory(dumpdirpath, "map.dat")) - { - pg_log_info("database restoring is skipped because file \"%s\" does not exist in directory \"%s\"", "map.dat", dumpdirpath); - return 0; - } - - snprintf(map_file_path, MAXPGPATH, "%s/map.dat", dumpdirpath); - - /* Open map.dat file. */ - pfile = fopen(map_file_path, PG_BINARY_R); - - if (pfile == NULL) - pg_fatal("could not open file \"%s\": %m", map_file_path); - - initStringInfo(&linebuf); - - /* Append all the dbname/db_oid combinations to the list. */ - while (pg_get_line_buf(pfile, &linebuf)) - { - Oid db_oid = InvalidOid; - char *dbname; - DbOidName *dbidname; - int namelen; - char *p = linebuf.data; - - /* Extract dboid. */ - while (isdigit((unsigned char) *p)) - p++; - if (p > linebuf.data && *p == ' ') - { - sscanf(linebuf.data, "%u", &db_oid); - p++; - } - - /* dbname is the rest of the line */ - dbname = p; - namelen = strlen(dbname); - - /* Report error and exit if the file has any corrupted data. */ - if (!OidIsValid(db_oid) || namelen <= 1) - pg_fatal("invalid entry in file \"%s\" on line %d", map_file_path, - count + 1); - - pg_log_info("found database \"%s\" (OID: %u) in file \"%s\"", - dbname, db_oid, map_file_path); - - dbidname = pg_malloc(offsetof(DbOidName, str) + namelen + 1); - dbidname->oid = db_oid; - strlcpy(dbidname->str, dbname, namelen); - - simple_ptr_list_append(dbname_oid_list, dbidname); - count++; - } - - /* Close map.dat file. */ - fclose(pfile); - - return count; -} - -/* - * restore_all_databases - * - * This will restore databases those dumps are present in - * directory based on map.dat file mapping. - * - * This will skip restoring for databases that are specified with - * exclude-database option. - * - * returns, number of errors while doing restore. - */ -static int -restore_all_databases(PGconn *conn, const char *dumpdirpath, - SimpleStringList db_exclude_patterns, RestoreOptions *opts, - int numWorkers) -{ - SimplePtrList dbname_oid_list = {NULL, NULL}; - int num_db_restore = 0; - int num_total_db; - int n_errors_total; - int count = 0; - char *connected_db = NULL; - bool dumpData = opts->dumpData; - bool dumpSchema = opts->dumpSchema; - bool dumpStatistics = opts->dumpSchema; - - /* Save db name to reuse it for all the database. */ - if (opts->cparams.dbname) - connected_db = opts->cparams.dbname; - - num_total_db = get_dbname_oid_list_from_mfile(dumpdirpath, &dbname_oid_list); - - /* If map.dat has no entries, return after processing global.dat */ - if (dbname_oid_list.head == NULL) - return process_global_sql_commands(conn, dumpdirpath, opts->filename); - - pg_log_info(ngettext("found %d database name in \"%s\"", - "found %d database names in \"%s\"", - num_total_db), - num_total_db, "map.dat"); - - if (!conn) - { - pg_log_info("trying to connect to database \"%s\"", "postgres"); - - conn = ConnectDatabase("postgres", NULL, opts->cparams.pghost, - opts->cparams.pgport, opts->cparams.username, TRI_DEFAULT, - false, progname, NULL, NULL, NULL, NULL); - - /* Try with template1. */ - if (!conn) - { - pg_log_info("trying to connect to database \"%s\"", "template1"); - - conn = ConnectDatabase("template1", NULL, opts->cparams.pghost, - opts->cparams.pgport, opts->cparams.username, TRI_DEFAULT, - false, progname, NULL, NULL, NULL, NULL); - } - } - - /* - * filter the db list according to the exclude patterns - */ - num_db_restore = get_dbnames_list_to_restore(conn, &dbname_oid_list, - db_exclude_patterns); - - /* Open global.dat file and execute/append all the global sql commands. */ - n_errors_total = process_global_sql_commands(conn, dumpdirpath, opts->filename); - - /* Close the db connection as we are done with globals and patterns. */ - if (conn) - PQfinish(conn); - - /* Exit if no db needs to be restored. */ - if (dbname_oid_list.head == NULL || num_db_restore == 0) - { - pg_log_info(ngettext("no database needs restoring out of %d database", - "no database needs restoring out of %d databases", num_total_db), - num_total_db); - return n_errors_total; - } - - pg_log_info("need to restore %d databases out of %d databases", num_db_restore, num_total_db); - - /* - * We have a list of databases to restore after processing the - * exclude-database switch(es). Now we can restore them one by one. - */ - for (SimplePtrListCell *db_cell = dbname_oid_list.head; - db_cell; db_cell = db_cell->next) - { - DbOidName *dbidname = (DbOidName *) db_cell->ptr; - char subdirpath[MAXPGPATH]; - char subdirdbpath[MAXPGPATH]; - char dbfilename[MAXPGPATH]; - int n_errors; - - /* ignore dbs marked for skipping */ - if (dbidname->oid == InvalidOid) - continue; - - /* - * We need to reset override_dbname so that objects can be restored - * into an already created database. (used with -d/--dbname option) - */ - if (opts->cparams.override_dbname) - { - pfree(opts->cparams.override_dbname); - opts->cparams.override_dbname = NULL; - } - - snprintf(subdirdbpath, MAXPGPATH, "%s/databases", dumpdirpath); - - /* - * Look for the database dump file/dir. If there is an {oid}.tar or - * {oid}.dmp file, use it. Otherwise try to use a directory called - * {oid} - */ - snprintf(dbfilename, MAXPGPATH, "%u.tar", dbidname->oid); - if (file_exists_in_directory(subdirdbpath, dbfilename)) - snprintf(subdirpath, MAXPGPATH, "%s/databases/%u.tar", dumpdirpath, dbidname->oid); - else - { - snprintf(dbfilename, MAXPGPATH, "%u.dmp", dbidname->oid); - - if (file_exists_in_directory(subdirdbpath, dbfilename)) - snprintf(subdirpath, MAXPGPATH, "%s/databases/%u.dmp", dumpdirpath, dbidname->oid); - else - snprintf(subdirpath, MAXPGPATH, "%s/databases/%u", dumpdirpath, dbidname->oid); - } - - pg_log_info("restoring database \"%s\"", dbidname->str); - - /* If database is already created, then don't set createDB flag. */ - if (opts->cparams.dbname) - { - PGconn *test_conn; - - test_conn = ConnectDatabase(dbidname->str, NULL, opts->cparams.pghost, - opts->cparams.pgport, opts->cparams.username, TRI_DEFAULT, - false, progname, NULL, NULL, NULL, NULL); - if (test_conn) - { - PQfinish(test_conn); - - /* Use already created database for connection. */ - opts->createDB = 0; - opts->cparams.dbname = dbidname->str; - } - else - { - /* we'll have to create it */ - opts->createDB = 1; - opts->cparams.dbname = connected_db; - } - } - - /* - * Reset flags - might have been reset in pg_backup_archiver.c by the - * previous restore. - */ - opts->dumpData = dumpData; - opts->dumpSchema = dumpSchema; - opts->dumpStatistics = dumpStatistics; - - /* Restore the single database. */ - n_errors = restore_one_database(subdirpath, opts, numWorkers, true, count); - - /* Print a summary of ignored errors during single database restore. */ - if (n_errors) - { - n_errors_total += n_errors; - pg_log_warning("errors ignored on database \"%s\" restore: %d", dbidname->str, n_errors); - } - - count++; - } - - /* Log number of processed databases. */ - pg_log_info("number of restored databases is %d", num_db_restore); - - /* Free dbname and dboid list. */ - simple_ptr_list_destroy(&dbname_oid_list); - - return n_errors_total; -} - -/* - * process_global_sql_commands - * - * Open global.dat and execute or copy the sql commands one by one. - * - * If outfile is not NULL, copy all sql commands into outfile rather than - * executing them. - * - * Returns the number of errors while processing global.dat - */ -static int -process_global_sql_commands(PGconn *conn, const char *dumpdirpath, const char *outfile) -{ - char global_file_path[MAXPGPATH]; - PGresult *result; - StringInfoData sqlstatement, - user_create; - FILE *pfile; - int n_errors = 0; - - snprintf(global_file_path, MAXPGPATH, "%s/global.dat", dumpdirpath); - - /* Open global.dat file. */ - pfile = fopen(global_file_path, PG_BINARY_R); - - if (pfile == NULL) - pg_fatal("could not open file \"%s\": %m", global_file_path); - - /* - * If outfile is given, then just copy all global.dat file data into - * outfile. - */ - if (outfile) - { - copy_or_print_global_file(outfile, pfile); - return 0; - } - - /* Init sqlstatement to append commands. */ - initStringInfo(&sqlstatement); - - /* creation statement for our current role */ - initStringInfo(&user_create); - appendStringInfoString(&user_create, "CREATE ROLE "); - /* should use fmtId here, but we don't know the encoding */ - appendStringInfoString(&user_create, PQuser(conn)); - appendStringInfoChar(&user_create, ';'); - - /* Process file till EOF and execute sql statements. */ - while (read_one_statement(&sqlstatement, pfile) != EOF) - { - /* don't try to create the role we are connected as */ - if (strstr(sqlstatement.data, user_create.data)) - continue; - - pg_log_info("executing query: %s", sqlstatement.data); - result = PQexec(conn, sqlstatement.data); - - switch (PQresultStatus(result)) - { - case PGRES_COMMAND_OK: - case PGRES_TUPLES_OK: - case PGRES_EMPTY_QUERY: - break; - default: - n_errors++; - pg_log_error("could not execute query: %s", PQerrorMessage(conn)); - pg_log_error_detail("Command was: %s", sqlstatement.data); - } - PQclear(result); - } - - /* Print a summary of ignored errors during global.dat. */ - if (n_errors) - pg_log_warning(ngettext("ignored %d error in file \"%s\"", - "ignored %d errors in file \"%s\"", n_errors), - n_errors, global_file_path); - fclose(pfile); - - return n_errors; -} - -/* - * copy_or_print_global_file - * - * Copy global.dat into the output file. If "-" is used as outfile, - * then print commands to stdout. - */ -static void -copy_or_print_global_file(const char *outfile, FILE *pfile) -{ - char out_file_path[MAXPGPATH]; - FILE *OPF; - int c; - - /* "-" is used for stdout. */ - if (strcmp(outfile, "-") == 0) - OPF = stdout; - else - { - snprintf(out_file_path, MAXPGPATH, "%s", outfile); - OPF = fopen(out_file_path, PG_BINARY_W); - - if (OPF == NULL) - { - fclose(pfile); - pg_fatal("could not open file: \"%s\"", outfile); - } - } - - /* Append global.dat into output file or print to stdout. */ - while ((c = fgetc(pfile)) != EOF) - fputc(c, OPF); - - fclose(pfile); - - /* Close output file. */ - if (strcmp(outfile, "-") != 0) - fclose(OPF); -} diff --git a/src/bin/pg_dump/t/001_basic.pl b/src/bin/pg_dump/t/001_basic.pl index c3c5fae11eaaf..37d893d5e6a5f 100644 --- a/src/bin/pg_dump/t/001_basic.pl +++ b/src/bin/pg_dump/t/001_basic.pl @@ -237,24 +237,6 @@ 'pg_restore: options -C\/--create and -1\/--single-transaction cannot be used together' ); -command_fails_like( - [ 'pg_restore', '--exclude-database=foo', '--globals-only', '-d', 'xxx' ], - qr/\Qpg_restore: error: option --exclude-database cannot be used together with -g\/--globals-only\E/, - 'pg_restore: option --exclude-database cannot be used together with -g/--globals-only' -); - -command_fails_like( - [ 'pg_restore', '--exclude-database=foo', '-d', 'xxx', 'dumpdir' ], - qr/\Qpg_restore: error: option --exclude-database can be used only when restoring an archive created by pg_dumpall\E/, - 'When option --exclude-database is used in pg_restore with dump of pg_dump' -); - -command_fails_like( - [ 'pg_restore', '--globals-only', '-d', 'xxx', 'dumpdir' ], - qr/\Qpg_restore: error: option -g\/--globals-only can be used only when restoring an archive created by pg_dumpall\E/, - 'When option --globals-only is not used in pg_restore with dump of pg_dump' -); - # also fails for -r and -t, but it seems pointless to add more tests for those. command_fails_like( [ 'pg_dumpall', '--exclude-database=foo', '--globals-only' ], @@ -262,8 +244,4 @@ 'pg_dumpall: option --exclude-database cannot be used together with -g/--globals-only' ); -command_fails_like( - [ 'pg_dumpall', '--format', 'x' ], - qr/\Qpg_dumpall: error: unrecognized output format "x";\E/, - 'pg_dumpall: unrecognized output format'); done_testing(); diff --git a/src/bin/pg_dump/t/006_pg_dumpall.pl b/src/bin/pg_dump/t/006_pg_dumpall.pl deleted file mode 100644 index c274b777586ad..0000000000000 --- a/src/bin/pg_dump/t/006_pg_dumpall.pl +++ /dev/null @@ -1,400 +0,0 @@ -# Copyright (c) 2021-2025, PostgreSQL Global Development Group - -use strict; -use warnings FATAL => 'all'; - -use PostgreSQL::Test::Cluster; -use PostgreSQL::Test::Utils; -use Test::More; - -my $tempdir = PostgreSQL::Test::Utils::tempdir; -my $run_db = 'postgres'; -my $sep = $windows_os ? "\\" : "/"; - -# Tablespace locations used by "restore_tablespace" test case. -my $tablespace1 = "${tempdir}${sep}tbl1"; -my $tablespace2 = "${tempdir}${sep}tbl2"; -mkdir($tablespace1) || die "mkdir $tablespace1 $!"; -mkdir($tablespace2) || die "mkdir $tablespace2 $!"; - -# Scape tablespace locations on Windows. -$tablespace1 = $windows_os ? ($tablespace1 =~ s/\\/\\\\/gr) : $tablespace1; -$tablespace2 = $windows_os ? ($tablespace2 =~ s/\\/\\\\/gr) : $tablespace2; - -# Where pg_dumpall will be executed. -my $node = PostgreSQL::Test::Cluster->new('node'); -$node->init; -$node->start; - - -############################################################### -# Definition of the pg_dumpall test cases to run. -# -# Each of these test cases are named and those names are used for fail -# reporting and also to save the dump and restore information needed for the -# test to assert. -# -# The "setup_sql" is a psql valid script that contains SQL commands to execute -# before of actually execute the tests. The setups are all executed before of -# any test execution. -# -# The "dump_cmd" and "restore_cmd" are the commands that will be executed. The -# "restore_cmd" must have the --file flag to save the restore output so that we -# can assert on it. -# -# The "like" and "unlike" is a regexp that is used to match the pg_restore -# output. It must have at least one of then filled per test cases but it also -# can have both. See "excluding_databases" test case for example. -my %pgdumpall_runs = ( - restore_roles => { - setup_sql => ' - CREATE ROLE dumpall WITH ENCRYPTED PASSWORD \'admin\' SUPERUSER; - CREATE ROLE dumpall2 WITH REPLICATION CONNECTION LIMIT 10;', - dump_cmd => [ - 'pg_dumpall', - '--format' => 'directory', - '--file' => "$tempdir/restore_roles", - ], - restore_cmd => [ - 'pg_restore', '-C', - '--format' => 'directory', - '--file' => "$tempdir/restore_roles.sql", - "$tempdir/restore_roles", - ], - like => qr/ - ^\s*\QCREATE ROLE dumpall;\E\s*\n - \s*\QALTER ROLE dumpall WITH SUPERUSER INHERIT NOCREATEROLE NOCREATEDB NOLOGIN NOREPLICATION NOBYPASSRLS PASSWORD 'SCRAM-SHA-256\E - [^']+';\s*\n - \s*\QCREATE ROLE dumpall2;\E - \s*\QALTER ROLE dumpall2 WITH NOSUPERUSER INHERIT NOCREATEROLE NOCREATEDB NOLOGIN REPLICATION NOBYPASSRLS CONNECTION LIMIT 10;\E - /xm - }, - - restore_tablespace => { - setup_sql => " - CREATE ROLE tap; - CREATE TABLESPACE tbl1 OWNER tap LOCATION '$tablespace1'; - CREATE TABLESPACE tbl2 OWNER tap LOCATION '$tablespace2' WITH (seq_page_cost=1.0);", - dump_cmd => [ - 'pg_dumpall', - '--format' => 'directory', - '--file' => "$tempdir/restore_tablespace", - ], - restore_cmd => [ - 'pg_restore', '-C', - '--format' => 'directory', - '--file' => "$tempdir/restore_tablespace.sql", - "$tempdir/restore_tablespace", - ], - # Match "E" as optional since it is added on LOCATION when running on - # Windows. - like => qr/^ - \n\QCREATE TABLESPACE tbl1 OWNER tap LOCATION \E(?:E)?\Q'$tablespace1';\E - \n\QCREATE TABLESPACE tbl2 OWNER tap LOCATION \E(?:E)?\Q'$tablespace2';\E - \n\QALTER TABLESPACE tbl2 SET (seq_page_cost=1.0);\E - /xm, - }, - - restore_grants => { - setup_sql => " - CREATE DATABASE tapgrantsdb; - CREATE SCHEMA private; - CREATE SEQUENCE serial START 101; - CREATE FUNCTION fn() RETURNS void AS \$\$ - BEGIN - END; - \$\$ LANGUAGE plpgsql; - CREATE ROLE super; - CREATE ROLE grant1; - CREATE ROLE grant2; - CREATE ROLE grant3; - CREATE ROLE grant4; - CREATE ROLE grant5; - CREATE ROLE grant6; - CREATE ROLE grant7; - CREATE ROLE grant8; - - CREATE TABLE t (id int); - INSERT INTO t VALUES (1), (2), (3), (4); - - GRANT SELECT ON TABLE t TO grant1; - GRANT INSERT ON TABLE t TO grant2; - GRANT ALL PRIVILEGES ON TABLE t to grant3; - GRANT CONNECT, CREATE ON DATABASE tapgrantsdb TO grant4; - GRANT USAGE, CREATE ON SCHEMA private TO grant5; - GRANT USAGE, SELECT, UPDATE ON SEQUENCE serial TO grant6; - GRANT super TO grant7; - GRANT EXECUTE ON FUNCTION fn() TO grant8; - ", - dump_cmd => [ - 'pg_dumpall', - '--format' => 'directory', - '--file' => "$tempdir/restore_grants", - ], - restore_cmd => [ - 'pg_restore', '-C', - '--format' => 'directory', - '--file' => "$tempdir/restore_grants.sql", - "$tempdir/restore_grants", - ], - like => qr/^ - \n\QGRANT super TO grant7 WITH INHERIT TRUE GRANTED BY\E - (.*\n)* - \n\QGRANT ALL ON SCHEMA private TO grant5;\E - (.*\n)* - \n\QGRANT ALL ON FUNCTION public.fn() TO grant8;\E - (.*\n)* - \n\QGRANT ALL ON SEQUENCE public.serial TO grant6;\E - (.*\n)* - \n\QGRANT SELECT ON TABLE public.t TO grant1;\E - \n\QGRANT INSERT ON TABLE public.t TO grant2;\E - \n\QGRANT ALL ON TABLE public.t TO grant3;\E - (.*\n)* - \n\QGRANT CREATE,CONNECT ON DATABASE tapgrantsdb TO grant4;\E - /xm, - }, - - excluding_databases => { - setup_sql => 'CREATE DATABASE db1; - \c db1 - CREATE TABLE t1 (id int); - INSERT INTO t1 VALUES (1), (2), (3), (4); - CREATE TABLE t2 (id int); - INSERT INTO t2 VALUES (1), (2), (3), (4); - - CREATE DATABASE db2; - \c db2 - CREATE TABLE t3 (id int); - INSERT INTO t3 VALUES (1), (2), (3), (4); - CREATE TABLE t4 (id int); - INSERT INTO t4 VALUES (1), (2), (3), (4); - - CREATE DATABASE dbex3; - \c dbex3 - CREATE TABLE t5 (id int); - INSERT INTO t5 VALUES (1), (2), (3), (4); - CREATE TABLE t6 (id int); - INSERT INTO t6 VALUES (1), (2), (3), (4); - - CREATE DATABASE dbex4; - \c dbex4 - CREATE TABLE t7 (id int); - INSERT INTO t7 VALUES (1), (2), (3), (4); - CREATE TABLE t8 (id int); - INSERT INTO t8 VALUES (1), (2), (3), (4); - - CREATE DATABASE db5; - \c db5 - CREATE TABLE t9 (id int); - INSERT INTO t9 VALUES (1), (2), (3), (4); - CREATE TABLE t10 (id int); - INSERT INTO t10 VALUES (1), (2), (3), (4); - ', - dump_cmd => [ - 'pg_dumpall', - '--format' => 'directory', - '--file' => "$tempdir/excluding_databases", - '--exclude-database' => 'dbex*', - ], - restore_cmd => [ - 'pg_restore', '-C', - '--format' => 'directory', - '--file' => "$tempdir/excluding_databases.sql", - '--exclude-database' => 'db5', - "$tempdir/excluding_databases", - ], - like => qr/^ - \n\QCREATE DATABASE db1\E - (.*\n)* - \n\QCREATE TABLE public.t1 (\E - (.*\n)* - \n\QCREATE TABLE public.t2 (\E - (.*\n)* - \n\QCREATE DATABASE db2\E - (.*\n)* - \n\QCREATE TABLE public.t3 (\E - (.*\n)* - \n\QCREATE TABLE public.t4 (/xm, - unlike => qr/^ - \n\QCREATE DATABASE db3\E - (.*\n)* - \n\QCREATE TABLE public.t5 (\E - (.*\n)* - \n\QCREATE TABLE public.t6 (\E - (.*\n)* - \n\QCREATE DATABASE db4\E - (.*\n)* - \n\QCREATE TABLE public.t7 (\E - (.*\n)* - \n\QCREATE TABLE public.t8 (\E - \n\QCREATE DATABASE db5\E - (.*\n)* - \n\QCREATE TABLE public.t9 (\E - (.*\n)* - \n\QCREATE TABLE public.t10 (\E - /xm, - }, - - format_directory => { - setup_sql => "CREATE TABLE format_directory(a int, b boolean, c text); - INSERT INTO format_directory VALUES (1, true, 'name1'), (2, false, 'name2');", - dump_cmd => [ - 'pg_dumpall', - '--format' => 'directory', - '--file' => "$tempdir/format_directory", - ], - restore_cmd => [ - 'pg_restore', '-C', - '--format' => 'directory', - '--file' => "$tempdir/format_directory.sql", - "$tempdir/format_directory", - ], - like => qr/^\n\QCOPY public.format_directory (a, b, c) FROM stdin;/xm - }, - - format_tar => { - setup_sql => "CREATE TABLE format_tar(a int, b boolean, c text); - INSERT INTO format_tar VALUES (1, false, 'name3'), (2, true, 'name4');", - dump_cmd => [ - 'pg_dumpall', - '--format' => 'tar', - '--file' => "$tempdir/format_tar", - ], - restore_cmd => [ - 'pg_restore', '-C', - '--format' => 'tar', - '--file' => "$tempdir/format_tar.sql", - "$tempdir/format_tar", - ], - like => qr/^\n\QCOPY public.format_tar (a, b, c) FROM stdin;/xm - }, - - format_custom => { - setup_sql => "CREATE TABLE format_custom(a int, b boolean, c text); - INSERT INTO format_custom VALUES (1, false, 'name5'), (2, true, 'name6');", - dump_cmd => [ - 'pg_dumpall', - '--format' => 'custom', - '--file' => "$tempdir/format_custom", - ], - restore_cmd => [ - 'pg_restore', '-C', - '--format' => 'custom', - '--file' => "$tempdir/format_custom.sql", - "$tempdir/format_custom", - ], - like => qr/^ \n\QCOPY public.format_custom (a, b, c) FROM stdin;/xm - }, - - dump_globals_only => { - setup_sql => "CREATE TABLE format_dir(a int, b boolean, c text); - INSERT INTO format_dir VALUES (1, false, 'name5'), (2, true, 'name6');", - dump_cmd => [ - 'pg_dumpall', - '--format' => 'directory', - '--globals-only', - '--file' => "$tempdir/dump_globals_only", - ], - restore_cmd => [ - 'pg_restore', '-C', '--globals-only', - '--format' => 'directory', - '--file' => "$tempdir/dump_globals_only.sql", - "$tempdir/dump_globals_only", - ], - like => qr/ - ^\s*\QCREATE ROLE dumpall;\E\s*\n - /xm - },); - -# First execute the setup_sql -foreach my $run (sort keys %pgdumpall_runs) -{ - if ($pgdumpall_runs{$run}->{setup_sql}) - { - $node->safe_psql($run_db, $pgdumpall_runs{$run}->{setup_sql}); - } -} - -# Execute the tests -foreach my $run (sort keys %pgdumpall_runs) -{ - # Create a new target cluster to pg_restore each test case run so that we - # don't need to take care of the cleanup from the target cluster after each - # run. - my $target_node = PostgreSQL::Test::Cluster->new("target_$run"); - $target_node->init; - $target_node->start; - - # Dumpall from node cluster. - $node->command_ok(\@{ $pgdumpall_runs{$run}->{dump_cmd} }, - "$run: pg_dumpall runs"); - - # Restore the dump on "target_node" cluster. - my @restore_cmd = ( - @{ $pgdumpall_runs{$run}->{restore_cmd} }, - '--host', $target_node->host, '--port', $target_node->port); - - my ($stdout, $stderr) = run_command(\@restore_cmd); - - # pg_restore --file output file. - my $output_file = slurp_file("$tempdir/${run}.sql"); - - if ( !($pgdumpall_runs{$run}->{like}) - && !($pgdumpall_runs{$run}->{unlike})) - { - die "missing \"like\" or \"unlike\" in test \"$run\""; - } - - if ($pgdumpall_runs{$run}->{like}) - { - like($output_file, $pgdumpall_runs{$run}->{like}, "should dump $run"); - } - - if ($pgdumpall_runs{$run}->{unlike}) - { - unlike( - $output_file, - $pgdumpall_runs{$run}->{unlike}, - "should not dump $run"); - } -} - -# Some negative test case with dump of pg_dumpall and restore using pg_restore -# test case 1: when -C is not used in pg_restore with dump of pg_dumpall -$node->command_fails_like( - [ - 'pg_restore', - "$tempdir/format_custom", - '--format' => 'custom', - '--file' => "$tempdir/error_test.sql", - ], - qr/\Qpg_restore: error: option -C\/--create must be specified when restoring an archive created by pg_dumpall\E/, - 'When -C is not used in pg_restore with dump of pg_dumpall'); - -# test case 2: When --list option is used with dump of pg_dumpall -$node->command_fails_like( - [ - 'pg_restore', - "$tempdir/format_custom", '-C', - '--format' => 'custom', - '--list', - '--file' => "$tempdir/error_test.sql", - ], - qr/\Qpg_restore: error: option -l\/--list cannot be used when restoring an archive created by pg_dumpall\E/, - 'When --list is used in pg_restore with dump of pg_dumpall'); - -# test case 3: When non-exist database is given with -d option -$node->command_fails_like( - [ - 'pg_restore', - "$tempdir/format_custom", '-C', - '--format' => 'custom', - '-d' => 'dbpq', - ], - qr/\Qpg_restore: error: could not connect to database "dbpq"\E/, - 'When non-existent database is given with -d option in pg_restore with dump of pg_dumpall' -); - -$node->stop('fast'); - -done_testing(); From 412036c22d6a605340dbe397da1fb12fccd3897f Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Wed, 30 Jul 2025 10:48:41 -0500 Subject: [PATCH 41/67] Teach pg_upgrade to handle in-place tablespaces. Presently, pg_upgrade assumes that all non-default tablespaces don't move to different directories during upgrade. Unfortunately, this isn't true for in-place tablespaces, which move to the new cluster's pg_tblspc directory. This commit teaches pg_upgrade to handle in-place tablespaces by retrieving the tablespace directories for both the old and new clusters. In turn, we can relax the prohibition on non-default tablespaces for same-version upgrades, i.e., if all non-default tablespaces are in-place, pg_upgrade may proceed. This change is primarily intended to enable additional pg_upgrade testing with non-default tablespaces, as is done in 006_transfer_modes.pl. Reviewed-by: Corey Huinker Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/aA_uBLYMUs5D66Nb%40nathan --- src/bin/pg_upgrade/check.c | 20 +++---- src/bin/pg_upgrade/info.c | 38 +++++++++++-- src/bin/pg_upgrade/parallel.c | 11 ++-- src/bin/pg_upgrade/pg_upgrade.h | 8 +-- src/bin/pg_upgrade/relfilenumber.c | 57 +++++++++---------- src/bin/pg_upgrade/t/006_transfer_modes.pl | 35 ++++++++++-- src/bin/pg_upgrade/tablespace.c | 65 +++++++++++++++++----- 7 files changed, 164 insertions(+), 70 deletions(-) diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index 5e6403f07731b..310f53c55771b 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -956,12 +956,12 @@ check_for_new_tablespace_dir(void) prep_status("Checking for new cluster tablespace directories"); - for (tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++) + for (tblnum = 0; tblnum < new_cluster.num_tablespaces; tblnum++) { struct stat statbuf; snprintf(new_tablespace_dir, MAXPGPATH, "%s%s", - os_info.old_tablespaces[tblnum], + new_cluster.tablespaces[tblnum], new_cluster.tablespace_suffix); if (stat(new_tablespace_dir, &statbuf) == 0 || errno != ENOENT) @@ -1013,17 +1013,17 @@ create_script_for_old_cluster_deletion(char **deletion_script_file_name) * directory. We can't create a proper old cluster delete script in that * case. */ - for (tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++) + for (tblnum = 0; tblnum < new_cluster.num_tablespaces; tblnum++) { - char old_tablespace_dir[MAXPGPATH]; + char new_tablespace_dir[MAXPGPATH]; - strlcpy(old_tablespace_dir, os_info.old_tablespaces[tblnum], MAXPGPATH); - canonicalize_path(old_tablespace_dir); - if (path_is_prefix_of_path(old_cluster_pgdata, old_tablespace_dir)) + strlcpy(new_tablespace_dir, new_cluster.tablespaces[tblnum], MAXPGPATH); + canonicalize_path(new_tablespace_dir); + if (path_is_prefix_of_path(old_cluster_pgdata, new_tablespace_dir)) { /* reproduce warning from CREATE TABLESPACE that is in the log */ pg_log(PG_WARNING, - "\nWARNING: user-defined tablespace locations should not be inside the data directory, i.e. %s", old_tablespace_dir); + "\nWARNING: user-defined tablespace locations should not be inside the data directory, i.e. %s", new_tablespace_dir); /* Unlink file in case it is left over from a previous run. */ unlink(*deletion_script_file_name); @@ -1051,9 +1051,9 @@ create_script_for_old_cluster_deletion(char **deletion_script_file_name) /* delete old cluster's alternate tablespaces */ old_tblspc_suffix = pg_strdup(old_cluster.tablespace_suffix); fix_path_separator(old_tblspc_suffix); - for (tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++) + for (tblnum = 0; tblnum < old_cluster.num_tablespaces; tblnum++) fprintf(script, RMDIR_CMD " %c%s%s%c\n", PATH_QUOTE, - fix_path_separator(os_info.old_tablespaces[tblnum]), + fix_path_separator(old_cluster.tablespaces[tblnum]), old_tblspc_suffix, PATH_QUOTE); pfree(old_tblspc_suffix); diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c index a437067cdca82..c39eb077c2fae 100644 --- a/src/bin/pg_upgrade/info.c +++ b/src/bin/pg_upgrade/info.c @@ -443,10 +443,26 @@ get_db_infos(ClusterInfo *cluster) for (tupnum = 0; tupnum < ntups; tupnum++) { + char *spcloc = PQgetvalue(res, tupnum, i_spclocation); + bool inplace = spcloc[0] && !is_absolute_path(spcloc); + dbinfos[tupnum].db_oid = atooid(PQgetvalue(res, tupnum, i_oid)); dbinfos[tupnum].db_name = pg_strdup(PQgetvalue(res, tupnum, i_datname)); - snprintf(dbinfos[tupnum].db_tablespace, sizeof(dbinfos[tupnum].db_tablespace), "%s", - PQgetvalue(res, tupnum, i_spclocation)); + + /* + * The tablespace location might be "", meaning the cluster default + * location, i.e. pg_default or pg_global. For in-place tablespaces, + * pg_tablespace_location() returns a path relative to the data + * directory. + */ + if (inplace) + snprintf(dbinfos[tupnum].db_tablespace, + sizeof(dbinfos[tupnum].db_tablespace), + "%s/%s", cluster->pgdata, spcloc); + else + snprintf(dbinfos[tupnum].db_tablespace, + sizeof(dbinfos[tupnum].db_tablespace), + "%s", spcloc); } PQclear(res); @@ -616,11 +632,21 @@ process_rel_infos(DbInfo *dbinfo, PGresult *res, void *arg) /* Is the tablespace oid non-default? */ if (atooid(PQgetvalue(res, relnum, i_reltablespace)) != 0) { + char *spcloc = PQgetvalue(res, relnum, i_spclocation); + bool inplace = spcloc[0] && !is_absolute_path(spcloc); + /* * The tablespace location might be "", meaning the cluster - * default location, i.e. pg_default or pg_global. + * default location, i.e. pg_default or pg_global. For in-place + * tablespaces, pg_tablespace_location() returns a path relative + * to the data directory. */ - tablespace = PQgetvalue(res, relnum, i_spclocation); + if (inplace) + tablespace = psprintf("%s/%s", + os_info.running_cluster->pgdata, + spcloc); + else + tablespace = spcloc; /* Can we reuse the previous string allocation? */ if (last_tablespace && strcmp(tablespace, last_tablespace) == 0) @@ -630,6 +656,10 @@ process_rel_infos(DbInfo *dbinfo, PGresult *res, void *arg) last_tablespace = curr->tablespace = pg_strdup(tablespace); curr->tblsp_alloc = true; } + + /* Free palloc'd string for in-place tablespaces. */ + if (inplace) + pfree(tablespace); } else /* A zero reltablespace oid indicates the database tablespace. */ diff --git a/src/bin/pg_upgrade/parallel.c b/src/bin/pg_upgrade/parallel.c index 056aa2edaee3f..6d7941844a7c8 100644 --- a/src/bin/pg_upgrade/parallel.c +++ b/src/bin/pg_upgrade/parallel.c @@ -40,6 +40,7 @@ typedef struct char *old_pgdata; char *new_pgdata; char *old_tablespace; + char *new_tablespace; } transfer_thread_arg; static exec_thread_arg **exec_thread_args; @@ -171,7 +172,7 @@ win32_exec_prog(exec_thread_arg *args) void parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, char *old_pgdata, char *new_pgdata, - char *old_tablespace) + char *old_tablespace, char *new_tablespace) { #ifndef WIN32 pid_t child; @@ -181,7 +182,7 @@ parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, #endif if (user_opts.jobs <= 1) - transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, new_pgdata, NULL); + transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, new_pgdata, NULL, NULL); else { /* parallel */ @@ -225,7 +226,7 @@ parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, if (child == 0) { transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, new_pgdata, - old_tablespace); + old_tablespace, new_tablespace); /* if we take another exit path, it will be non-zero */ /* use _exit to skip atexit() functions */ _exit(0); @@ -246,6 +247,7 @@ parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, new_arg->new_pgdata = pg_strdup(new_pgdata); pg_free(new_arg->old_tablespace); new_arg->old_tablespace = old_tablespace ? pg_strdup(old_tablespace) : NULL; + new_arg->new_tablespace = new_tablespace ? pg_strdup(new_tablespace) : NULL; child = (HANDLE) _beginthreadex(NULL, 0, (void *) win32_transfer_all_new_dbs, new_arg, 0, NULL); @@ -263,7 +265,8 @@ DWORD win32_transfer_all_new_dbs(transfer_thread_arg *args) { transfer_all_new_dbs(args->old_db_arr, args->new_db_arr, args->old_pgdata, - args->new_pgdata, args->old_tablespace); + args->new_pgdata, args->old_tablespace, + args->new_tablespace); /* terminates thread */ return 0; diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index e9401430e697f..0ef47be0dc199 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -300,6 +300,8 @@ typedef struct uint32 major_version; /* PG_VERSION of cluster */ char major_version_str[64]; /* string PG_VERSION of cluster */ uint32 bin_version; /* version returned from pg_ctl */ + char **tablespaces; /* tablespace directories */ + int num_tablespaces; const char *tablespace_suffix; /* directory specification */ int nsubs; /* number of subscriptions */ bool sub_retain_dead_tuples; /* whether a subscription enables @@ -356,8 +358,6 @@ typedef struct const char *progname; /* complete pathname for this program */ char *user; /* username for clusters */ bool user_specified; /* user specified on command-line */ - char **old_tablespaces; /* tablespaces */ - int num_old_tablespaces; LibraryInfo *libraries; /* loadable libraries */ int num_libraries; ClusterInfo *running_cluster; @@ -457,7 +457,7 @@ void transfer_all_new_tablespaces(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, char *old_pgdata, char *new_pgdata); void transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, char *old_pgdata, char *new_pgdata, - char *old_tablespace); + char *old_tablespace, char *new_tablespace); /* tablespace.c */ @@ -505,7 +505,7 @@ void parallel_exec_prog(const char *log_file, const char *opt_log_file, const char *fmt,...) pg_attribute_printf(3, 4); void parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, char *old_pgdata, char *new_pgdata, - char *old_tablespace); + char *old_tablespace, char *new_tablespace); bool reap_child(bool wait_for_child); /* task.c */ diff --git a/src/bin/pg_upgrade/relfilenumber.c b/src/bin/pg_upgrade/relfilenumber.c index 8d8e816a01fa4..38c17ceabf222 100644 --- a/src/bin/pg_upgrade/relfilenumber.c +++ b/src/bin/pg_upgrade/relfilenumber.c @@ -17,7 +17,7 @@ #include "common/logging.h" #include "pg_upgrade.h" -static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace); +static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace, char *new_tablespace); static void transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_frozenbit); /* @@ -136,21 +136,22 @@ transfer_all_new_tablespaces(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, */ if (user_opts.jobs <= 1) parallel_transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, - new_pgdata, NULL); + new_pgdata, NULL, NULL); else { int tblnum; /* transfer default tablespace */ parallel_transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, - new_pgdata, old_pgdata); + new_pgdata, old_pgdata, new_pgdata); - for (tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++) + for (tblnum = 0; tblnum < old_cluster.num_tablespaces; tblnum++) parallel_transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, new_pgdata, - os_info.old_tablespaces[tblnum]); + old_cluster.tablespaces[tblnum], + new_cluster.tablespaces[tblnum]); /* reap all children */ while (reap_child(true) == true) ; @@ -169,7 +170,8 @@ transfer_all_new_tablespaces(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, */ void transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, - char *old_pgdata, char *new_pgdata, char *old_tablespace) + char *old_pgdata, char *new_pgdata, + char *old_tablespace, char *new_tablespace) { int old_dbnum, new_dbnum; @@ -204,7 +206,7 @@ transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, new_pgdata); if (n_maps) { - transfer_single_new_db(mappings, n_maps, old_tablespace); + transfer_single_new_db(mappings, n_maps, old_tablespace, new_tablespace); } /* We allocate something even for n_maps == 0 */ pg_free(mappings); @@ -234,10 +236,10 @@ transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, * moved_db_dir: Destination for the pg_restore-generated database directory. */ static bool -prepare_for_swap(const char *old_tablespace, Oid db_oid, - char *old_catalog_dir, char *new_db_dir, char *moved_db_dir) +prepare_for_swap(const char *old_tablespace, const char *new_tablespace, + Oid db_oid, char *old_catalog_dir, char *new_db_dir, + char *moved_db_dir) { - const char *new_tablespace; const char *old_tblspc_suffix; const char *new_tblspc_suffix; char old_tblspc[MAXPGPATH]; @@ -247,24 +249,14 @@ prepare_for_swap(const char *old_tablespace, Oid db_oid, struct stat st; if (strcmp(old_tablespace, old_cluster.pgdata) == 0) - { - new_tablespace = new_cluster.pgdata; - new_tblspc_suffix = "/base"; old_tblspc_suffix = "/base"; - } else - { - /* - * XXX: The below line is a hack to deal with the fact that we - * presently don't have an easy way to find the corresponding new - * tablespace's path. This will need to be fixed if/when we add - * pg_upgrade support for in-place tablespaces. - */ - new_tablespace = old_tablespace; + old_tblspc_suffix = old_cluster.tablespace_suffix; + if (strcmp(new_tablespace, new_cluster.pgdata) == 0) + new_tblspc_suffix = "/base"; + else new_tblspc_suffix = new_cluster.tablespace_suffix; - old_tblspc_suffix = old_cluster.tablespace_suffix; - } /* Old and new cluster paths. */ snprintf(old_tblspc, sizeof(old_tblspc), "%s%s", old_tablespace, old_tblspc_suffix); @@ -450,7 +442,7 @@ swap_catalog_files(FileNameMap *maps, int size, const char *old_catalog_dir, * during pg_restore. */ static void -do_swap(FileNameMap *maps, int size, char *old_tablespace) +do_swap(FileNameMap *maps, int size, char *old_tablespace, char *new_tablespace) { char old_catalog_dir[MAXPGPATH]; char new_db_dir[MAXPGPATH]; @@ -470,21 +462,23 @@ do_swap(FileNameMap *maps, int size, char *old_tablespace) */ if (old_tablespace) { - if (prepare_for_swap(old_tablespace, maps[0].db_oid, + if (prepare_for_swap(old_tablespace, new_tablespace, maps[0].db_oid, old_catalog_dir, new_db_dir, moved_db_dir)) swap_catalog_files(maps, size, old_catalog_dir, new_db_dir, moved_db_dir); } else { - if (prepare_for_swap(old_cluster.pgdata, maps[0].db_oid, + if (prepare_for_swap(old_cluster.pgdata, new_cluster.pgdata, maps[0].db_oid, old_catalog_dir, new_db_dir, moved_db_dir)) swap_catalog_files(maps, size, old_catalog_dir, new_db_dir, moved_db_dir); - for (int tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++) + for (int tblnum = 0; tblnum < old_cluster.num_tablespaces; tblnum++) { - if (prepare_for_swap(os_info.old_tablespaces[tblnum], maps[0].db_oid, + if (prepare_for_swap(old_cluster.tablespaces[tblnum], + new_cluster.tablespaces[tblnum], + maps[0].db_oid, old_catalog_dir, new_db_dir, moved_db_dir)) swap_catalog_files(maps, size, old_catalog_dir, new_db_dir, moved_db_dir); @@ -498,7 +492,8 @@ do_swap(FileNameMap *maps, int size, char *old_tablespace) * create links for mappings stored in "maps" array. */ static void -transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace) +transfer_single_new_db(FileNameMap *maps, int size, + char *old_tablespace, char *new_tablespace) { int mapnum; bool vm_must_add_frozenbit = false; @@ -520,7 +515,7 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace) */ Assert(!vm_must_add_frozenbit); - do_swap(maps, size, old_tablespace); + do_swap(maps, size, old_tablespace, new_tablespace); return; } diff --git a/src/bin/pg_upgrade/t/006_transfer_modes.pl b/src/bin/pg_upgrade/t/006_transfer_modes.pl index 58fe8a8c7dcea..348f402146234 100644 --- a/src/bin/pg_upgrade/t/006_transfer_modes.pl +++ b/src/bin/pg_upgrade/t/006_transfer_modes.pl @@ -38,6 +38,13 @@ sub test_mode } $new->init(); + # allow_in_place_tablespaces is available as far back as v10. + if ($old->pg_version >= 10) + { + $new->append_conf('postgresql.conf', "allow_in_place_tablespaces = true"); + $old->append_conf('postgresql.conf', "allow_in_place_tablespaces = true"); + } + # Create a small variety of simple test objects on the old cluster. We'll # check that these reach the new version after upgrading. $old->start; @@ -49,8 +56,7 @@ sub test_mode $old->safe_psql('testdb1', "VACUUM FULL test2"); $old->safe_psql('testdb1', "CREATE SEQUENCE testseq START 5432"); - # For cross-version tests, we can also check that pg_upgrade handles - # tablespaces. + # If an old installation is provided, we can test non-in-place tablespaces. if (defined($ENV{oldinstall})) { my $tblspc = PostgreSQL::Test::Utils::tempdir_short(); @@ -64,6 +70,19 @@ sub test_mode $old->safe_psql('testdb2', "CREATE TABLE test4 AS SELECT generate_series(400, 502)"); } + + # If the old cluster is >= v10, we can test in-place tablespaces. + if ($old->pg_version >= 10) + { + $old->safe_psql('postgres', + "CREATE TABLESPACE inplc_tblspc LOCATION ''"); + $old->safe_psql('postgres', + "CREATE DATABASE testdb3 TABLESPACE inplc_tblspc"); + $old->safe_psql('postgres', + "CREATE TABLE test5 TABLESPACE inplc_tblspc AS SELECT generate_series(503, 606)"); + $old->safe_psql('testdb3', + "CREATE TABLE test6 AS SELECT generate_series(607, 711)"); + } $old->stop; my $result = command_ok_or_fails_like( @@ -94,8 +113,7 @@ sub test_mode $result = $new->safe_psql('testdb1', "SELECT nextval('testseq')"); is($result, '5432', "sequence data after pg_upgrade $mode"); - # For cross-version tests, we should have some objects in a non-default - # tablespace. + # Tests for non-in-place tablespaces. if (defined($ENV{oldinstall})) { $result = @@ -105,6 +123,15 @@ sub test_mode $new->safe_psql('testdb2', "SELECT COUNT(*) FROM test4"); is($result, '103', "test4 data after pg_upgrade $mode"); } + + # Tests for in-place tablespaces. + if ($old->pg_version >= 10) + { + $result = $new->safe_psql('postgres', "SELECT COUNT(*) FROM test5"); + is($result, '104', "test5 data after pg_upgrade $mode"); + $result = $new->safe_psql('testdb3', "SELECT COUNT(*) FROM test6"); + is($result, '105', "test6 data after pg_upgrade $mode"); + } $new->stop; } diff --git a/src/bin/pg_upgrade/tablespace.c b/src/bin/pg_upgrade/tablespace.c index 3520a75ba317d..151d74e17349b 100644 --- a/src/bin/pg_upgrade/tablespace.c +++ b/src/bin/pg_upgrade/tablespace.c @@ -23,10 +23,20 @@ init_tablespaces(void) set_tablespace_directory_suffix(&old_cluster); set_tablespace_directory_suffix(&new_cluster); - if (os_info.num_old_tablespaces > 0 && + if (old_cluster.num_tablespaces > 0 && strcmp(old_cluster.tablespace_suffix, new_cluster.tablespace_suffix) == 0) - pg_fatal("Cannot upgrade to/from the same system catalog version when\n" - "using tablespaces."); + { + for (int i = 0; i < old_cluster.num_tablespaces; i++) + { + /* + * In-place tablespaces are okay for same-version upgrades because + * their paths will differ between clusters. + */ + if (strcmp(old_cluster.tablespaces[i], new_cluster.tablespaces[i]) == 0) + pg_fatal("Cannot upgrade to/from the same system catalog version when\n" + "using tablespaces."); + } + } } @@ -53,19 +63,48 @@ get_tablespace_paths(void) res = executeQueryOrDie(conn, "%s", query); - if ((os_info.num_old_tablespaces = PQntuples(res)) != 0) - os_info.old_tablespaces = - (char **) pg_malloc(os_info.num_old_tablespaces * sizeof(char *)); + old_cluster.num_tablespaces = PQntuples(res); + new_cluster.num_tablespaces = PQntuples(res); + + if (PQntuples(res) != 0) + { + old_cluster.tablespaces = + (char **) pg_malloc(old_cluster.num_tablespaces * sizeof(char *)); + new_cluster.tablespaces = + (char **) pg_malloc(new_cluster.num_tablespaces * sizeof(char *)); + } else - os_info.old_tablespaces = NULL; + { + old_cluster.tablespaces = NULL; + new_cluster.tablespaces = NULL; + } i_spclocation = PQfnumber(res, "spclocation"); - for (tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++) + for (tblnum = 0; tblnum < old_cluster.num_tablespaces; tblnum++) { struct stat statBuf; + char *spcloc = PQgetvalue(res, tblnum, i_spclocation); - os_info.old_tablespaces[tblnum] = pg_strdup(PQgetvalue(res, tblnum, i_spclocation)); + /* + * For now, we do not expect non-in-place tablespaces to move during + * upgrade. If that changes, it will likely become necessary to run + * the above query on the new cluster, too. + * + * pg_tablespace_location() returns absolute paths for non-in-place + * tablespaces and relative paths for in-place ones, so we use + * is_absolute_path() to distinguish between them. + */ + if (is_absolute_path(PQgetvalue(res, tblnum, i_spclocation))) + { + old_cluster.tablespaces[tblnum] = pg_strdup(spcloc); + new_cluster.tablespaces[tblnum] = old_cluster.tablespaces[tblnum]; + } + else + { + old_cluster.tablespaces[tblnum] = psprintf("%s/%s", old_cluster.pgdata, spcloc); + new_cluster.tablespaces[tblnum] = psprintf("%s/%s", new_cluster.pgdata, spcloc); + } /* * Check that the tablespace path exists and is a directory. @@ -76,21 +115,21 @@ get_tablespace_paths(void) * that contains user tablespaces is moved as part of pg_upgrade * preparation and the symbolic links are not updated. */ - if (stat(os_info.old_tablespaces[tblnum], &statBuf) != 0) + if (stat(old_cluster.tablespaces[tblnum], &statBuf) != 0) { if (errno == ENOENT) report_status(PG_FATAL, "tablespace directory \"%s\" does not exist", - os_info.old_tablespaces[tblnum]); + old_cluster.tablespaces[tblnum]); else report_status(PG_FATAL, "could not stat tablespace directory \"%s\": %m", - os_info.old_tablespaces[tblnum]); + old_cluster.tablespaces[tblnum]); } if (!S_ISDIR(statBuf.st_mode)) report_status(PG_FATAL, "tablespace path \"%s\" is not a directory", - os_info.old_tablespaces[tblnum]); + old_cluster.tablespaces[tblnum]); } PQclear(res); From ee924698d566223e927cf9d505c1ccdacd7061c8 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Wed, 30 Jul 2025 13:04:47 -0500 Subject: [PATCH 42/67] doc: Adjust documentation for vacuumdb --missing-stats-only. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sentence in question gave readers the impression that vacuumdb removes statistics for a period of time while analyzing, but it's actually meant to convey that --analyze-in-stages temporarily replaces existing statistics with ones generated with lower statistics targets. Reported-by: Frédéric Yhuel Reviewed-by: Frédéric Yhuel Reviewed-by: "David G. Johnston" Reviewed-by: Corey Huinker Reviewed-by: Jeff Davis Discussion: https://postgr.es/m/4b94ca16-7a6d-4581-b2aa-4ea79dbc082a%40dalibo.com Backpatch-through: 18 --- doc/src/sgml/ref/vacuumdb.sgml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/doc/src/sgml/ref/vacuumdb.sgml b/doc/src/sgml/ref/vacuumdb.sgml index b0680a61814cc..c7d9dca17b867 100644 --- a/doc/src/sgml/ref/vacuumdb.sgml +++ b/doc/src/sgml/ref/vacuumdb.sgml @@ -282,9 +282,11 @@ PostgreSQL documentation Only analyze relations that are missing statistics for a column, index - expression, or extended statistics object. This option prevents - vacuumdb from deleting existing statistics - so that the query optimizer's choices do not become transiently worse. + expression, or extended statistics object. When used with + , this option prevents + vacuumdb from temporarily replacing existing + statistics with ones generated with lower statistics targets, thus + avoiding transiently worse query optimizer choices. This option can only be used in conjunction with From e125e360020a7b0affd5bea938b749e85d8999d3 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 31 Jul 2025 10:06:34 +0900 Subject: [PATCH 43/67] Rename CachedPlanType to PlannedStmtOrigin for PlannedStmt Commit 719dcf3c42 introduced a field called CachedPlanType in PlannedStmt to allow extensions to determine whether a cached plan is generic or custom. After discussion, the concepts that we want to track are a bit wider than initially anticipated, as it is closer to knowing from which "source" or "origin" a PlannedStmt has been generated or retrieved. Custom and generic cached plans are a subset of that. Based on the state of HEAD, we have been able to define two more origins: - "standard", for the case where PlannedStmt is generated in standard_planner(), the most common case. - "internal", for the fake PlannedStmt generated internally by some query patterns. This could be tuned in the future depending on what is needed. This looks like a good starting point, at least. The default value is called "UNKNOWN", provided as fallback value. This value is not used in the core code, the idea is to let extensions building their own PlannedStmts know about this new field. Author: Michael Paquier Co-authored-by: Sami Imseih Discussion: https://postgr.es/m/aILaHupXbIGgF2wJ@paquier.xyz --- src/backend/commands/foreigncmds.c | 2 +- src/backend/commands/schemacmds.c | 2 +- src/backend/executor/execParallel.c | 2 +- src/backend/optimizer/plan/planner.c | 2 +- src/backend/tcop/postgres.c | 2 +- src/backend/tcop/utility.c | 4 ++-- src/backend/utils/cache/plancache.c | 2 +- src/include/nodes/plannodes.h | 21 +++++++++++---------- src/tools/pgindent/typedefs.list | 2 +- 9 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/backend/commands/foreigncmds.c b/src/backend/commands/foreigncmds.c index fcd5fcd8915e3..77f8461f42eee 100644 --- a/src/backend/commands/foreigncmds.c +++ b/src/backend/commands/foreigncmds.c @@ -1588,7 +1588,7 @@ ImportForeignSchema(ImportForeignSchemaStmt *stmt) pstmt->utilityStmt = (Node *) cstmt; pstmt->stmt_location = rs->stmt_location; pstmt->stmt_len = rs->stmt_len; - pstmt->cached_plan_type = PLAN_CACHE_NONE; + pstmt->planOrigin = PLAN_STMT_INTERNAL; /* Execute statement */ ProcessUtility(pstmt, cmd, false, diff --git a/src/backend/commands/schemacmds.c b/src/backend/commands/schemacmds.c index c00f1a11384f1..0f03d9743d203 100644 --- a/src/backend/commands/schemacmds.c +++ b/src/backend/commands/schemacmds.c @@ -215,7 +215,7 @@ CreateSchemaCommand(CreateSchemaStmt *stmt, const char *queryString, wrapper->utilityStmt = stmt; wrapper->stmt_location = stmt_location; wrapper->stmt_len = stmt_len; - wrapper->cached_plan_type = PLAN_CACHE_NONE; + wrapper->planOrigin = PLAN_STMT_INTERNAL; /* do this step */ ProcessUtility(wrapper, diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index fc76f22fb8238..f098a5557cf07 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -189,7 +189,7 @@ ExecSerializePlan(Plan *plan, EState *estate) pstmt->permInfos = estate->es_rteperminfos; pstmt->resultRelations = NIL; pstmt->appendRelations = NIL; - pstmt->cached_plan_type = PLAN_CACHE_NONE; + pstmt->planOrigin = PLAN_STMT_INTERNAL; /* * Transfer only parallel-safe subplans, leaving a NULL "hole" in the list diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index a77b2147e9592..d59d6e4c6a021 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -558,6 +558,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, result->commandType = parse->commandType; result->queryId = parse->queryId; + result->planOrigin = PLAN_STMT_STANDARD; result->hasReturning = (parse->returningList != NIL); result->hasModifyingCTE = parse->hasModifyingCTE; result->canSetTag = parse->canSetTag; @@ -582,7 +583,6 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, result->utilityStmt = parse->utilityStmt; result->stmt_location = parse->stmt_location; result->stmt_len = parse->stmt_len; - result->cached_plan_type = PLAN_CACHE_NONE; result->jitFlags = PGJIT_NONE; if (jit_enabled && jit_above_cost >= 0 && diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index a297606cdd7fa..0cecd4649020f 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -988,7 +988,7 @@ pg_plan_queries(List *querytrees, const char *query_string, int cursorOptions, stmt->stmt_location = query->stmt_location; stmt->stmt_len = query->stmt_len; stmt->queryId = query->queryId; - stmt->cached_plan_type = PLAN_CACHE_NONE; + stmt->planOrigin = PLAN_STMT_INTERNAL; } else { diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index babc34d0cbe1d..4f4191b0ea6b4 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -1234,7 +1234,7 @@ ProcessUtilitySlow(ParseState *pstate, wrapper->utilityStmt = stmt; wrapper->stmt_location = pstmt->stmt_location; wrapper->stmt_len = pstmt->stmt_len; - wrapper->cached_plan_type = PLAN_CACHE_NONE; + wrapper->planOrigin = PLAN_STMT_INTERNAL; ProcessUtility(wrapper, queryString, @@ -1965,7 +1965,7 @@ ProcessUtilityForAlterTable(Node *stmt, AlterTableUtilityContext *context) wrapper->utilityStmt = stmt; wrapper->stmt_location = context->pstmt->stmt_location; wrapper->stmt_len = context->pstmt->stmt_len; - wrapper->cached_plan_type = PLAN_CACHE_NONE; + wrapper->planOrigin = PLAN_STMT_INTERNAL; ProcessUtility(wrapper, context->queryString, diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c index f4d2b9458a5ea..0c506d320b137 100644 --- a/src/backend/utils/cache/plancache.c +++ b/src/backend/utils/cache/plancache.c @@ -1390,7 +1390,7 @@ GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, { PlannedStmt *pstmt = (PlannedStmt *) lfirst(lc); - pstmt->cached_plan_type = customplan ? PLAN_CACHE_CUSTOM : PLAN_CACHE_GENERIC; + pstmt->planOrigin = customplan ? PLAN_STMT_CACHE_CUSTOM : PLAN_STMT_CACHE_GENERIC; } return plan; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 6d8e1e99db3bd..29d7732d6a031 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -29,18 +29,19 @@ */ /* ---------------- - * CachedPlanType + * PlannedStmtOrigin * - * CachedPlanType identifies whether a PlannedStmt is a cached plan, and if - * so, whether it is generic or custom. + * PlannedStmtOrigin identifies from where a PlannedStmt comes from. * ---------------- */ -typedef enum CachedPlanType +typedef enum PlannedStmtOrigin { - PLAN_CACHE_NONE = 0, /* Not a cached plan */ - PLAN_CACHE_GENERIC, /* Generic cached plan */ - PLAN_CACHE_CUSTOM, /* Custom cached plan */ -} CachedPlanType; + PLAN_STMT_UNKNOWN = 0, /* plan origin is not yet known */ + PLAN_STMT_INTERNAL, /* generated internally by a query */ + PLAN_STMT_STANDARD, /* standard planned statement */ + PLAN_STMT_CACHE_GENERIC, /* Generic cached plan */ + PLAN_STMT_CACHE_CUSTOM, /* Custom cached plan */ +} PlannedStmtOrigin; /* ---------------- * PlannedStmt node @@ -72,8 +73,8 @@ typedef struct PlannedStmt /* plan identifier (can be set by plugins) */ int64 planId; - /* type of cached plan */ - CachedPlanType cached_plan_type; + /* origin of plan */ + PlannedStmtOrigin planOrigin; /* is it insert|update|delete|merge RETURNING? */ bool hasReturning; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 3daba26b23723..e6f2e93b2d6fa 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -391,7 +391,6 @@ CachedFunctionHashEntry CachedFunctionHashKey CachedPlan CachedPlanSource -CachedPlanType CallContext CallStmt CancelRequestPacket @@ -2276,6 +2275,7 @@ PlanInvalItem PlanRowMark PlanState PlannedStmt +PlannedStmtOrigin PlannerGlobal PlannerInfo PlannerParamItem From 3357471cf9f5e470dfed0c7919bcf31c7efaf2b9 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 31 Jul 2025 11:20:29 +0900 Subject: [PATCH 44/67] pg_stat_statements: Add counters for generic and custom plans This patch adds two new counters to pg_stat_statements: - generic_plan_calls - custom_plan_calls These counters track how many times a prepared statement was executed using a generic or custom plan, respectively, providing a global equivalent at query level, for top and non-top levels, of pg_prepared_statements whose data is restricted to a single session. This commit builds upon e125e360020a. The module is bumped to version 1.13. PGSS_FILE_HEADER is bumped as well, something that the latest patches touching the on-disk format of the PGSS file did not actually bother with since 2022.. Author: Sami Imseih Reviewed-by: Ilia Evdokimov Reviewed-by: Andrei Lepikhov Reviewed-by: Michael Paquier Reviewed-by: Nikolay Samokhvalov Discussion: https://postgr.es/m/CAA5RZ0uFw8Y9GCFvafhC=OA8NnMqVZyzXPfv_EePOt+iv1T-qQ@mail.gmail.com --- contrib/pg_stat_statements/Makefile | 3 +- .../expected/oldextversions.out | 67 ++++++ .../pg_stat_statements/expected/plancache.out | 224 ++++++++++++++++++ contrib/pg_stat_statements/meson.build | 2 + .../pg_stat_statements--1.12--1.13.sql | 78 ++++++ .../pg_stat_statements/pg_stat_statements.c | 53 ++++- .../pg_stat_statements.control | 2 +- .../pg_stat_statements/sql/oldextversions.sql | 5 + contrib/pg_stat_statements/sql/plancache.sql | 94 ++++++++ doc/src/sgml/pgstatstatements.sgml | 18 ++ 10 files changed, 536 insertions(+), 10 deletions(-) create mode 100644 contrib/pg_stat_statements/expected/plancache.out create mode 100644 contrib/pg_stat_statements/pg_stat_statements--1.12--1.13.sql create mode 100644 contrib/pg_stat_statements/sql/plancache.sql diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile index b2bd8794d2a14..fe0478ac55266 100644 --- a/contrib/pg_stat_statements/Makefile +++ b/contrib/pg_stat_statements/Makefile @@ -7,6 +7,7 @@ OBJS = \ EXTENSION = pg_stat_statements DATA = pg_stat_statements--1.4.sql \ + pg_stat_statements--1.12--1.13.sql \ pg_stat_statements--1.11--1.12.sql pg_stat_statements--1.10--1.11.sql \ pg_stat_statements--1.9--1.10.sql pg_stat_statements--1.8--1.9.sql \ pg_stat_statements--1.7--1.8.sql pg_stat_statements--1.6--1.7.sql \ @@ -20,7 +21,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS)) REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf REGRESS = select dml cursors utility level_tracking planning \ user_activity wal entry_timestamp privileges extended \ - parallel cleanup oldextversions squashing + parallel plancache cleanup oldextversions squashing # Disabled because these tests require "shared_preload_libraries=pg_stat_statements", # which typical installcheck users do not have (e.g. buildfarm clients). NO_INSTALLCHECK = 1 diff --git a/contrib/pg_stat_statements/expected/oldextversions.out b/contrib/pg_stat_statements/expected/oldextversions.out index de679b19711ab..726383a99d7c1 100644 --- a/contrib/pg_stat_statements/expected/oldextversions.out +++ b/contrib/pg_stat_statements/expected/oldextversions.out @@ -407,4 +407,71 @@ SELECT count(*) > 0 AS has_data FROM pg_stat_statements; t (1 row) +-- New functions and views for pg_stat_statements in 1.13 +AlTER EXTENSION pg_stat_statements UPDATE TO '1.13'; +\d pg_stat_statements + View "public.pg_stat_statements" + Column | Type | Collation | Nullable | Default +----------------------------+--------------------------+-----------+----------+--------- + userid | oid | | | + dbid | oid | | | + toplevel | boolean | | | + queryid | bigint | | | + query | text | | | + plans | bigint | | | + total_plan_time | double precision | | | + min_plan_time | double precision | | | + max_plan_time | double precision | | | + mean_plan_time | double precision | | | + stddev_plan_time | double precision | | | + calls | bigint | | | + total_exec_time | double precision | | | + min_exec_time | double precision | | | + max_exec_time | double precision | | | + mean_exec_time | double precision | | | + stddev_exec_time | double precision | | | + rows | bigint | | | + shared_blks_hit | bigint | | | + shared_blks_read | bigint | | | + shared_blks_dirtied | bigint | | | + shared_blks_written | bigint | | | + local_blks_hit | bigint | | | + local_blks_read | bigint | | | + local_blks_dirtied | bigint | | | + local_blks_written | bigint | | | + temp_blks_read | bigint | | | + temp_blks_written | bigint | | | + shared_blk_read_time | double precision | | | + shared_blk_write_time | double precision | | | + local_blk_read_time | double precision | | | + local_blk_write_time | double precision | | | + temp_blk_read_time | double precision | | | + temp_blk_write_time | double precision | | | + wal_records | bigint | | | + wal_fpi | bigint | | | + wal_bytes | numeric | | | + wal_buffers_full | bigint | | | + jit_functions | bigint | | | + jit_generation_time | double precision | | | + jit_inlining_count | bigint | | | + jit_inlining_time | double precision | | | + jit_optimization_count | bigint | | | + jit_optimization_time | double precision | | | + jit_emission_count | bigint | | | + jit_emission_time | double precision | | | + jit_deform_count | bigint | | | + jit_deform_time | double precision | | | + parallel_workers_to_launch | bigint | | | + parallel_workers_launched | bigint | | | + generic_plan_calls | bigint | | | + custom_plan_calls | bigint | | | + stats_since | timestamp with time zone | | | + minmax_stats_since | timestamp with time zone | | | + +SELECT count(*) > 0 AS has_data FROM pg_stat_statements; + has_data +---------- + t +(1 row) + DROP EXTENSION pg_stat_statements; diff --git a/contrib/pg_stat_statements/expected/plancache.out b/contrib/pg_stat_statements/expected/plancache.out new file mode 100644 index 0000000000000..e152de9f55130 --- /dev/null +++ b/contrib/pg_stat_statements/expected/plancache.out @@ -0,0 +1,224 @@ +-- +-- Tests with plan cache +-- +-- Setup +CREATE OR REPLACE FUNCTION select_one_func(int) RETURNS VOID AS $$ +DECLARE + ret INT; +BEGIN + SELECT $1 INTO ret; +END; +$$ LANGUAGE plpgsql; +CREATE OR REPLACE PROCEDURE select_one_proc(int) AS $$ +DECLARE + ret INT; +BEGIN + SELECT $1 INTO ret; +END; +$$ LANGUAGE plpgsql; +-- Prepared statements +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +PREPARE p1 AS SELECT $1 AS a; +SET plan_cache_mode TO force_generic_plan; +EXECUTE p1(1); + a +--- + 1 +(1 row) + +SET plan_cache_mode TO force_custom_plan; +EXECUTE p1(1); + a +--- + 1 +(1 row) + +SELECT calls, generic_plan_calls, custom_plan_calls, query FROM pg_stat_statements + ORDER BY query COLLATE "C"; + calls | generic_plan_calls | custom_plan_calls | query +-------+--------------------+-------------------+---------------------------------------------------- + 2 | 1 | 1 | PREPARE p1 AS SELECT $1 AS a + 1 | 0 | 0 | SELECT pg_stat_statements_reset() IS NOT NULL AS t + 2 | 0 | 0 | SET plan_cache_mode TO $1 +(3 rows) + +DEALLOCATE p1; +-- Extended query protocol +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT $1 AS a \parse p1 +SET plan_cache_mode TO force_generic_plan; +\bind_named p1 1 +; + a +--- + 1 +(1 row) + +SET plan_cache_mode TO force_custom_plan; +\bind_named p1 1 +; + a +--- + 1 +(1 row) + +SELECT calls, generic_plan_calls, custom_plan_calls, query FROM pg_stat_statements + ORDER BY query COLLATE "C"; + calls | generic_plan_calls | custom_plan_calls | query +-------+--------------------+-------------------+---------------------------------------------------- + 2 | 1 | 1 | SELECT $1 AS a + 1 | 0 | 0 | SELECT pg_stat_statements_reset() IS NOT NULL AS t + 2 | 0 | 0 | SET plan_cache_mode TO $1 +(3 rows) + +\close_prepared p1 +-- EXPLAIN [ANALYZE] EXECUTE +SET pg_stat_statements.track = 'all'; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +PREPARE p1 AS SELECT $1; +SET plan_cache_mode TO force_generic_plan; +EXPLAIN (COSTS OFF) EXECUTE p1(1); + QUERY PLAN +------------ + Result +(1 row) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) EXECUTE p1(1); + QUERY PLAN +----------------------------------- + Result (actual rows=1.00 loops=1) +(1 row) + +SET plan_cache_mode TO force_custom_plan; +EXPLAIN (COSTS OFF) EXECUTE p1(1); + QUERY PLAN +------------ + Result +(1 row) + +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) EXECUTE p1(1); + QUERY PLAN +----------------------------------- + Result (actual rows=1.00 loops=1) +(1 row) + +SELECT calls, generic_plan_calls, custom_plan_calls, toplevel, query FROM pg_stat_statements + ORDER BY query COLLATE "C"; + calls | generic_plan_calls | custom_plan_calls | toplevel | query +-------+--------------------+-------------------+----------+---------------------------------------------------------------------------------- + 2 | 0 | 0 | t | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) EXECUTE p1(1) + 2 | 0 | 0 | t | EXPLAIN (COSTS OFF) EXECUTE p1(1) + 4 | 2 | 2 | f | PREPARE p1 AS SELECT $1 + 1 | 0 | 0 | t | SELECT pg_stat_statements_reset() IS NOT NULL AS t + 2 | 0 | 0 | t | SET plan_cache_mode TO $1 +(5 rows) + +RESET pg_stat_statements.track; +DEALLOCATE p1; +-- Functions/procedures +SET pg_stat_statements.track = 'all'; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SET plan_cache_mode TO force_generic_plan; +SELECT select_one_func(1); + select_one_func +----------------- + +(1 row) + +CALL select_one_proc(1); +SET plan_cache_mode TO force_custom_plan; +SELECT select_one_func(1); + select_one_func +----------------- + +(1 row) + +CALL select_one_proc(1); +SELECT calls, generic_plan_calls, custom_plan_calls, toplevel, query FROM pg_stat_statements + ORDER BY query COLLATE "C"; + calls | generic_plan_calls | custom_plan_calls | toplevel | query +-------+--------------------+-------------------+----------+---------------------------------------------------- + 2 | 0 | 0 | t | CALL select_one_proc($1) + 4 | 2 | 2 | f | SELECT $1 + 1 | 0 | 0 | t | SELECT pg_stat_statements_reset() IS NOT NULL AS t + 2 | 0 | 0 | t | SELECT select_one_func($1) + 2 | 0 | 0 | t | SET plan_cache_mode TO $1 +(5 rows) + +-- +-- EXPLAIN [ANALYZE] EXECUTE + functions/procedures +-- +SET pg_stat_statements.track = 'all'; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SET plan_cache_mode TO force_generic_plan; +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) SELECT select_one_func(1); + QUERY PLAN +----------------------------------- + Result (actual rows=1.00 loops=1) +(1 row) + +EXPLAIN (COSTS OFF) SELECT select_one_func(1); + QUERY PLAN +------------ + Result +(1 row) + +CALL select_one_proc(1); +SET plan_cache_mode TO force_custom_plan; +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) SELECT select_one_func(1); + QUERY PLAN +----------------------------------- + Result (actual rows=1.00 loops=1) +(1 row) + +EXPLAIN (COSTS OFF) SELECT select_one_func(1); + QUERY PLAN +------------ + Result +(1 row) + +CALL select_one_proc(1); +SELECT calls, generic_plan_calls, custom_plan_calls, toplevel, query FROM pg_stat_statements + ORDER BY query COLLATE "C", toplevel; + calls | generic_plan_calls | custom_plan_calls | toplevel | query +-------+--------------------+-------------------+----------+------------------------------------------------------------------------------------------------ + 2 | 0 | 0 | t | CALL select_one_proc($1) + 2 | 0 | 0 | t | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) SELECT select_one_func($1) + 4 | 0 | 0 | f | EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) SELECT select_one_func($1); + 2 | 0 | 0 | t | EXPLAIN (COSTS OFF) SELECT select_one_func($1) + 4 | 2 | 2 | f | SELECT $1 + 1 | 0 | 0 | t | SELECT pg_stat_statements_reset() IS NOT NULL AS t + 2 | 0 | 0 | t | SET plan_cache_mode TO $1 +(7 rows) + +RESET pg_stat_statements.track; +-- +-- Cleanup +-- +DROP FUNCTION select_one_func(int); +DROP PROCEDURE select_one_proc(int); diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build index 01a6cbdcf6139..7b8bfbb1de78c 100644 --- a/contrib/pg_stat_statements/meson.build +++ b/contrib/pg_stat_statements/meson.build @@ -21,6 +21,7 @@ contrib_targets += pg_stat_statements install_data( 'pg_stat_statements.control', 'pg_stat_statements--1.4.sql', + 'pg_stat_statements--1.12--1.13.sql', 'pg_stat_statements--1.11--1.12.sql', 'pg_stat_statements--1.10--1.11.sql', 'pg_stat_statements--1.9--1.10.sql', @@ -54,6 +55,7 @@ tests += { 'privileges', 'extended', 'parallel', + 'plancache', 'cleanup', 'oldextversions', 'squashing', diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.12--1.13.sql b/contrib/pg_stat_statements/pg_stat_statements--1.12--1.13.sql new file mode 100644 index 0000000000000..2f0eaf14ec34d --- /dev/null +++ b/contrib/pg_stat_statements/pg_stat_statements--1.12--1.13.sql @@ -0,0 +1,78 @@ +/* contrib/pg_stat_statements/pg_stat_statements--1.12--1.13.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pg_stat_statements UPDATE TO '1.13'" to load this file. \quit + +/* First we have to remove them from the extension */ +ALTER EXTENSION pg_stat_statements DROP VIEW pg_stat_statements; +ALTER EXTENSION pg_stat_statements DROP FUNCTION pg_stat_statements(boolean); + +/* Then we can drop them */ +DROP VIEW pg_stat_statements; +DROP FUNCTION pg_stat_statements(boolean); + +/* Now redefine */ +CREATE FUNCTION pg_stat_statements(IN showtext boolean, + OUT userid oid, + OUT dbid oid, + OUT toplevel bool, + OUT queryid bigint, + OUT query text, + OUT plans int8, + OUT total_plan_time float8, + OUT min_plan_time float8, + OUT max_plan_time float8, + OUT mean_plan_time float8, + OUT stddev_plan_time float8, + OUT calls int8, + OUT total_exec_time float8, + OUT min_exec_time float8, + OUT max_exec_time float8, + OUT mean_exec_time float8, + OUT stddev_exec_time float8, + OUT rows int8, + OUT shared_blks_hit int8, + OUT shared_blks_read int8, + OUT shared_blks_dirtied int8, + OUT shared_blks_written int8, + OUT local_blks_hit int8, + OUT local_blks_read int8, + OUT local_blks_dirtied int8, + OUT local_blks_written int8, + OUT temp_blks_read int8, + OUT temp_blks_written int8, + OUT shared_blk_read_time float8, + OUT shared_blk_write_time float8, + OUT local_blk_read_time float8, + OUT local_blk_write_time float8, + OUT temp_blk_read_time float8, + OUT temp_blk_write_time float8, + OUT wal_records int8, + OUT wal_fpi int8, + OUT wal_bytes numeric, + OUT wal_buffers_full int8, + OUT jit_functions int8, + OUT jit_generation_time float8, + OUT jit_inlining_count int8, + OUT jit_inlining_time float8, + OUT jit_optimization_count int8, + OUT jit_optimization_time float8, + OUT jit_emission_count int8, + OUT jit_emission_time float8, + OUT jit_deform_count int8, + OUT jit_deform_time float8, + OUT parallel_workers_to_launch int8, + OUT parallel_workers_launched int8, + OUT generic_plan_calls int8, + OUT custom_plan_calls int8, + OUT stats_since timestamp with time zone, + OUT minmax_stats_since timestamp with time zone +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'pg_stat_statements_1_13' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE VIEW pg_stat_statements AS + SELECT * FROM pg_stat_statements(true); + +GRANT SELECT ON pg_stat_statements TO PUBLIC; diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index e7857f81ec057..9fc9635d3300d 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -85,7 +85,7 @@ PG_MODULE_MAGIC_EXT( #define PGSS_TEXT_FILE PG_STAT_TMP_DIR "/pgss_query_texts.stat" /* Magic number identifying the stats file format */ -static const uint32 PGSS_FILE_HEADER = 0x20220408; +static const uint32 PGSS_FILE_HEADER = 0x20250731; /* PostgreSQL major version number, changes in which invalidate all entries */ static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100; @@ -114,6 +114,7 @@ typedef enum pgssVersion PGSS_V1_10, PGSS_V1_11, PGSS_V1_12, + PGSS_V1_13, } pgssVersion; typedef enum pgssStoreKind @@ -210,6 +211,8 @@ typedef struct Counters * to be launched */ int64 parallel_workers_launched; /* # of parallel workers actually * launched */ + int64 generic_plan_calls; /* number of calls using a generic plan */ + int64 custom_plan_calls; /* number of calls using a custom plan */ } Counters; /* @@ -323,6 +326,7 @@ PG_FUNCTION_INFO_V1(pg_stat_statements_1_9); PG_FUNCTION_INFO_V1(pg_stat_statements_1_10); PG_FUNCTION_INFO_V1(pg_stat_statements_1_11); PG_FUNCTION_INFO_V1(pg_stat_statements_1_12); +PG_FUNCTION_INFO_V1(pg_stat_statements_1_13); PG_FUNCTION_INFO_V1(pg_stat_statements); PG_FUNCTION_INFO_V1(pg_stat_statements_info); @@ -355,7 +359,8 @@ static void pgss_store(const char *query, int64 queryId, const struct JitInstrumentation *jitusage, JumbleState *jstate, int parallel_workers_to_launch, - int parallel_workers_launched); + int parallel_workers_launched, + PlannedStmtOrigin planOrigin); static void pg_stat_statements_internal(FunctionCallInfo fcinfo, pgssVersion api_version, bool showtext); @@ -877,7 +882,8 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate) NULL, jstate, 0, - 0); + 0, + PLAN_STMT_UNKNOWN); } /* @@ -957,7 +963,8 @@ pgss_planner(Query *parse, NULL, NULL, 0, - 0); + 0, + result->planOrigin); } else { @@ -1091,7 +1098,8 @@ pgss_ExecutorEnd(QueryDesc *queryDesc) queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL, NULL, queryDesc->estate->es_parallel_workers_to_launch, - queryDesc->estate->es_parallel_workers_launched); + queryDesc->estate->es_parallel_workers_launched, + queryDesc->plannedstmt->planOrigin); } if (prev_ExecutorEnd) @@ -1224,7 +1232,8 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, NULL, NULL, 0, - 0); + 0, + pstmt->planOrigin); } else { @@ -1287,7 +1296,8 @@ pgss_store(const char *query, int64 queryId, const struct JitInstrumentation *jitusage, JumbleState *jstate, int parallel_workers_to_launch, - int parallel_workers_launched) + int parallel_workers_launched, + PlannedStmtOrigin planOrigin) { pgssHashKey key; pgssEntry *entry; @@ -1495,6 +1505,12 @@ pgss_store(const char *query, int64 queryId, entry->counters.parallel_workers_to_launch += parallel_workers_to_launch; entry->counters.parallel_workers_launched += parallel_workers_launched; + /* plan cache counters */ + if (planOrigin == PLAN_STMT_CACHE_GENERIC) + entry->counters.generic_plan_calls++; + else if (planOrigin == PLAN_STMT_CACHE_CUSTOM) + entry->counters.custom_plan_calls++; + SpinLockRelease(&entry->mutex); } @@ -1562,7 +1578,8 @@ pg_stat_statements_reset(PG_FUNCTION_ARGS) #define PG_STAT_STATEMENTS_COLS_V1_10 43 #define PG_STAT_STATEMENTS_COLS_V1_11 49 #define PG_STAT_STATEMENTS_COLS_V1_12 52 -#define PG_STAT_STATEMENTS_COLS 52 /* maximum of above */ +#define PG_STAT_STATEMENTS_COLS_V1_13 54 +#define PG_STAT_STATEMENTS_COLS 54 /* maximum of above */ /* * Retrieve statement statistics. @@ -1574,6 +1591,16 @@ pg_stat_statements_reset(PG_FUNCTION_ARGS) * expected API version is identified by embedding it in the C name of the * function. Unfortunately we weren't bright enough to do that for 1.1. */ +Datum +pg_stat_statements_1_13(PG_FUNCTION_ARGS) +{ + bool showtext = PG_GETARG_BOOL(0); + + pg_stat_statements_internal(fcinfo, PGSS_V1_13, showtext); + + return (Datum) 0; +} + Datum pg_stat_statements_1_12(PG_FUNCTION_ARGS) { @@ -1732,6 +1759,10 @@ pg_stat_statements_internal(FunctionCallInfo fcinfo, if (api_version != PGSS_V1_12) elog(ERROR, "incorrect number of output arguments"); break; + case PG_STAT_STATEMENTS_COLS_V1_13: + if (api_version != PGSS_V1_13) + elog(ERROR, "incorrect number of output arguments"); + break; default: elog(ERROR, "incorrect number of output arguments"); } @@ -1984,6 +2015,11 @@ pg_stat_statements_internal(FunctionCallInfo fcinfo, values[i++] = Int64GetDatumFast(tmp.parallel_workers_to_launch); values[i++] = Int64GetDatumFast(tmp.parallel_workers_launched); } + if (api_version >= PGSS_V1_13) + { + values[i++] = Int64GetDatumFast(tmp.generic_plan_calls); + values[i++] = Int64GetDatumFast(tmp.custom_plan_calls); + } if (api_version >= PGSS_V1_11) { values[i++] = TimestampTzGetDatum(stats_since); @@ -1999,6 +2035,7 @@ pg_stat_statements_internal(FunctionCallInfo fcinfo, api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 : api_version == PGSS_V1_11 ? PG_STAT_STATEMENTS_COLS_V1_11 : api_version == PGSS_V1_12 ? PG_STAT_STATEMENTS_COLS_V1_12 : + api_version == PGSS_V1_13 ? PG_STAT_STATEMENTS_COLS_V1_13 : -1 /* fail if you forget to update this assert */ )); tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls); diff --git a/contrib/pg_stat_statements/pg_stat_statements.control b/contrib/pg_stat_statements/pg_stat_statements.control index d45ebc12e3605..2eee0ceffa894 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.control +++ b/contrib/pg_stat_statements/pg_stat_statements.control @@ -1,5 +1,5 @@ # pg_stat_statements extension comment = 'track planning and execution statistics of all SQL statements executed' -default_version = '1.12' +default_version = '1.13' module_pathname = '$libdir/pg_stat_statements' relocatable = true diff --git a/contrib/pg_stat_statements/sql/oldextversions.sql b/contrib/pg_stat_statements/sql/oldextversions.sql index 13b8ca28586d1..e416efe9ffbee 100644 --- a/contrib/pg_stat_statements/sql/oldextversions.sql +++ b/contrib/pg_stat_statements/sql/oldextversions.sql @@ -63,4 +63,9 @@ AlTER EXTENSION pg_stat_statements UPDATE TO '1.12'; \d pg_stat_statements SELECT count(*) > 0 AS has_data FROM pg_stat_statements; +-- New functions and views for pg_stat_statements in 1.13 +AlTER EXTENSION pg_stat_statements UPDATE TO '1.13'; +\d pg_stat_statements +SELECT count(*) > 0 AS has_data FROM pg_stat_statements; + DROP EXTENSION pg_stat_statements; diff --git a/contrib/pg_stat_statements/sql/plancache.sql b/contrib/pg_stat_statements/sql/plancache.sql new file mode 100644 index 0000000000000..160ced7add368 --- /dev/null +++ b/contrib/pg_stat_statements/sql/plancache.sql @@ -0,0 +1,94 @@ +-- +-- Tests with plan cache +-- + +-- Setup +CREATE OR REPLACE FUNCTION select_one_func(int) RETURNS VOID AS $$ +DECLARE + ret INT; +BEGIN + SELECT $1 INTO ret; +END; +$$ LANGUAGE plpgsql; +CREATE OR REPLACE PROCEDURE select_one_proc(int) AS $$ +DECLARE + ret INT; +BEGIN + SELECT $1 INTO ret; +END; +$$ LANGUAGE plpgsql; + +-- Prepared statements +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +PREPARE p1 AS SELECT $1 AS a; +SET plan_cache_mode TO force_generic_plan; +EXECUTE p1(1); +SET plan_cache_mode TO force_custom_plan; +EXECUTE p1(1); +SELECT calls, generic_plan_calls, custom_plan_calls, query FROM pg_stat_statements + ORDER BY query COLLATE "C"; +DEALLOCATE p1; + +-- Extended query protocol +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT $1 AS a \parse p1 +SET plan_cache_mode TO force_generic_plan; +\bind_named p1 1 +; +SET plan_cache_mode TO force_custom_plan; +\bind_named p1 1 +; +SELECT calls, generic_plan_calls, custom_plan_calls, query FROM pg_stat_statements + ORDER BY query COLLATE "C"; +\close_prepared p1 + +-- EXPLAIN [ANALYZE] EXECUTE +SET pg_stat_statements.track = 'all'; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +PREPARE p1 AS SELECT $1; +SET plan_cache_mode TO force_generic_plan; +EXPLAIN (COSTS OFF) EXECUTE p1(1); +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) EXECUTE p1(1); +SET plan_cache_mode TO force_custom_plan; +EXPLAIN (COSTS OFF) EXECUTE p1(1); +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) EXECUTE p1(1); +SELECT calls, generic_plan_calls, custom_plan_calls, toplevel, query FROM pg_stat_statements + ORDER BY query COLLATE "C"; +RESET pg_stat_statements.track; +DEALLOCATE p1; + +-- Functions/procedures +SET pg_stat_statements.track = 'all'; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SET plan_cache_mode TO force_generic_plan; +SELECT select_one_func(1); +CALL select_one_proc(1); +SET plan_cache_mode TO force_custom_plan; +SELECT select_one_func(1); +CALL select_one_proc(1); +SELECT calls, generic_plan_calls, custom_plan_calls, toplevel, query FROM pg_stat_statements + ORDER BY query COLLATE "C"; + +-- +-- EXPLAIN [ANALYZE] EXECUTE + functions/procedures +-- +SET pg_stat_statements.track = 'all'; +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SET plan_cache_mode TO force_generic_plan; +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) SELECT select_one_func(1); +EXPLAIN (COSTS OFF) SELECT select_one_func(1); +CALL select_one_proc(1); +SET plan_cache_mode TO force_custom_plan; +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF, BUFFERS OFF) SELECT select_one_func(1); +EXPLAIN (COSTS OFF) SELECT select_one_func(1); +CALL select_one_proc(1); +SELECT calls, generic_plan_calls, custom_plan_calls, toplevel, query FROM pg_stat_statements + ORDER BY query COLLATE "C", toplevel; + +RESET pg_stat_statements.track; + +-- +-- Cleanup +-- +DROP FUNCTION select_one_func(int); +DROP PROCEDURE select_one_proc(int); diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml index 7baa07dcdbf7f..d753de5836efb 100644 --- a/doc/src/sgml/pgstatstatements.sgml +++ b/doc/src/sgml/pgstatstatements.sgml @@ -554,6 +554,24 @@ + + + generic_plan_calls bigint + + + Number of times the statement has been executed using a generic plan + + + + + + custom_plan_calls bigint + + + Number of times the statement has been executed using a custom plan + + + stats_since timestamp with time zone From 0decd5e89db9f5edb9b27351082f0d74aae7a9b6 Mon Sep 17 00:00:00 2001 From: Noah Misch Date: Thu, 31 Jul 2025 06:37:56 -0700 Subject: [PATCH 45/67] Sort dump objects independent of OIDs, for the 7 holdout object types. pg_dump sorts objects by their logical names, e.g. (nspname, relname, tgname), before dependency-driven reordering. That removes one source of logically-identical databases differing in their schema-only dumps. In other words, it helps with schema diffing. The logical name sort ignored essential sort keys for constraints, operators, PUBLICATION ... FOR TABLE, PUBLICATION ... FOR TABLES IN SCHEMA, operator classes, and operator families. pg_dump's sort then depended on object OID, yielding spurious schema diffs. After this change, OIDs affect dump order only in the event of catalog corruption. While pg_dump also wrongly ignored pg_collation.collencoding, CREATE COLLATION restrictions have been keeping that imperceptible in practical use. Use techniques like we use for object types already having full sort key coverage. Where the pertinent queries weren't fetching the ignored sort keys, this adds columns to those queries and stores those keys in memory for the long term. The ignorance of sort keys became more problematic when commit 172259afb563d35001410dc6daad78b250924038 added a schema diff test sensitive to it. Buildfarm member hippopotamus witnessed that. However, dump order stability isn't a new goal, and this might avoid other dump comparison failures. Hence, back-patch to v13 (all supported versions). Reviewed-by: Robert Haas Discussion: https://postgr.es/m/20250707192654.9e.nmisch@google.com Backpatch-through: 13 --- src/bin/pg_dump/common.c | 19 ++ src/bin/pg_dump/pg_dump.c | 59 +++++- src/bin/pg_dump/pg_dump.h | 6 + src/bin/pg_dump/pg_dump_sort.c | 238 ++++++++++++++++++++-- src/test/regress/expected/publication.out | 21 ++ src/test/regress/sql/publication.sql | 22 ++ 6 files changed, 335 insertions(+), 30 deletions(-) diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c index aa1589e3331d2..a1976fae607d6 100644 --- a/src/bin/pg_dump/common.c +++ b/src/bin/pg_dump/common.c @@ -17,6 +17,7 @@ #include +#include "catalog/pg_am_d.h" #include "catalog/pg_class_d.h" #include "catalog/pg_collation_d.h" #include "catalog/pg_extension_d.h" @@ -944,6 +945,24 @@ findOprByOid(Oid oid) return (OprInfo *) dobj; } +/* + * findAccessMethodByOid + * finds the DumpableObject for the access method with the given oid + * returns NULL if not found + */ +AccessMethodInfo * +findAccessMethodByOid(Oid oid) +{ + CatalogId catId; + DumpableObject *dobj; + + catId.tableoid = AccessMethodRelationId; + catId.oid = oid; + dobj = findObjectByCatalogId(catId); + Assert(dobj == NULL || dobj->objType == DO_ACCESS_METHOD); + return (AccessMethodInfo *) dobj; +} + /* * findCollationByOid * finds the DumpableObject for the collation with the given oid diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 1da6bd7d9726c..273117c977c52 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -2207,6 +2207,13 @@ selectDumpableProcLang(ProcLangInfo *plang, Archive *fout) static void selectDumpableAccessMethod(AccessMethodInfo *method, Archive *fout) { + /* see getAccessMethods() comment about v9.6. */ + if (fout->remoteVersion < 90600) + { + method->dobj.dump = DUMP_COMPONENT_NONE; + return; + } + if (checkExtensionMembership(&method->dobj, fout)) return; /* extension membership overrides all else */ @@ -6262,6 +6269,8 @@ getOperators(Archive *fout) int i_oprnamespace; int i_oprowner; int i_oprkind; + int i_oprleft; + int i_oprright; int i_oprcode; /* @@ -6273,6 +6282,8 @@ getOperators(Archive *fout) "oprnamespace, " "oprowner, " "oprkind, " + "oprleft, " + "oprright, " "oprcode::oid AS oprcode " "FROM pg_operator"); @@ -6288,6 +6299,8 @@ getOperators(Archive *fout) i_oprnamespace = PQfnumber(res, "oprnamespace"); i_oprowner = PQfnumber(res, "oprowner"); i_oprkind = PQfnumber(res, "oprkind"); + i_oprleft = PQfnumber(res, "oprleft"); + i_oprright = PQfnumber(res, "oprright"); i_oprcode = PQfnumber(res, "oprcode"); for (i = 0; i < ntups; i++) @@ -6301,6 +6314,8 @@ getOperators(Archive *fout) findNamespace(atooid(PQgetvalue(res, i, i_oprnamespace))); oprinfo[i].rolname = getRoleName(PQgetvalue(res, i, i_oprowner)); oprinfo[i].oprkind = (PQgetvalue(res, i, i_oprkind))[0]; + oprinfo[i].oprleft = atooid(PQgetvalue(res, i, i_oprleft)); + oprinfo[i].oprright = atooid(PQgetvalue(res, i, i_oprright)); oprinfo[i].oprcode = atooid(PQgetvalue(res, i, i_oprcode)); /* Decide whether we want to dump it */ @@ -6329,6 +6344,7 @@ getCollations(Archive *fout) int i_collname; int i_collnamespace; int i_collowner; + int i_collencoding; query = createPQExpBuffer(); @@ -6339,7 +6355,8 @@ getCollations(Archive *fout) appendPQExpBufferStr(query, "SELECT tableoid, oid, collname, " "collnamespace, " - "collowner " + "collowner, " + "collencoding " "FROM pg_collation"); res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); @@ -6353,6 +6370,7 @@ getCollations(Archive *fout) i_collname = PQfnumber(res, "collname"); i_collnamespace = PQfnumber(res, "collnamespace"); i_collowner = PQfnumber(res, "collowner"); + i_collencoding = PQfnumber(res, "collencoding"); for (i = 0; i < ntups; i++) { @@ -6364,6 +6382,7 @@ getCollations(Archive *fout) collinfo[i].dobj.namespace = findNamespace(atooid(PQgetvalue(res, i, i_collnamespace))); collinfo[i].rolname = getRoleName(PQgetvalue(res, i, i_collowner)); + collinfo[i].collencoding = atoi(PQgetvalue(res, i, i_collencoding)); /* Decide whether we want to dump it */ selectDumpableObject(&(collinfo[i].dobj), fout); @@ -6454,16 +6473,28 @@ getAccessMethods(Archive *fout) int i_amhandler; int i_amtype; - /* Before 9.6, there are no user-defined access methods */ - if (fout->remoteVersion < 90600) - return; - query = createPQExpBuffer(); - /* Select all access methods from pg_am table */ - appendPQExpBufferStr(query, "SELECT tableoid, oid, amname, amtype, " - "amhandler::pg_catalog.regproc AS amhandler " - "FROM pg_am"); + /* + * Select all access methods from pg_am table. v9.6 introduced CREATE + * ACCESS METHOD, so earlier versions usually have only built-in access + * methods. v9.6 also changed the access method API, replacing dozens of + * pg_am columns with amhandler. Even if a user created an access method + * by "INSERT INTO pg_am", we have no way to translate pre-v9.6 pg_am + * columns to a v9.6+ CREATE ACCESS METHOD. Hence, before v9.6, read + * pg_am just to facilitate findAccessMethodByOid() providing the + * OID-to-name mapping. + */ + appendPQExpBufferStr(query, "SELECT tableoid, oid, amname, "); + if (fout->remoteVersion >= 90600) + appendPQExpBufferStr(query, + "amtype, " + "amhandler::pg_catalog.regproc AS amhandler "); + else + appendPQExpBufferStr(query, + "'i'::pg_catalog.\"char\" AS amtype, " + "'-'::pg_catalog.regproc AS amhandler "); + appendPQExpBufferStr(query, "FROM pg_am"); res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); @@ -6512,6 +6543,7 @@ getOpclasses(Archive *fout) OpclassInfo *opcinfo; int i_tableoid; int i_oid; + int i_opcmethod; int i_opcname; int i_opcnamespace; int i_opcowner; @@ -6521,7 +6553,7 @@ getOpclasses(Archive *fout) * system-defined opclasses at dump-out time. */ - appendPQExpBufferStr(query, "SELECT tableoid, oid, opcname, " + appendPQExpBufferStr(query, "SELECT tableoid, oid, opcmethod, opcname, " "opcnamespace, " "opcowner " "FROM pg_opclass"); @@ -6534,6 +6566,7 @@ getOpclasses(Archive *fout) i_tableoid = PQfnumber(res, "tableoid"); i_oid = PQfnumber(res, "oid"); + i_opcmethod = PQfnumber(res, "opcmethod"); i_opcname = PQfnumber(res, "opcname"); i_opcnamespace = PQfnumber(res, "opcnamespace"); i_opcowner = PQfnumber(res, "opcowner"); @@ -6547,6 +6580,7 @@ getOpclasses(Archive *fout) opcinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_opcname)); opcinfo[i].dobj.namespace = findNamespace(atooid(PQgetvalue(res, i, i_opcnamespace))); + opcinfo[i].opcmethod = atooid(PQgetvalue(res, i, i_opcmethod)); opcinfo[i].rolname = getRoleName(PQgetvalue(res, i, i_opcowner)); /* Decide whether we want to dump it */ @@ -6572,6 +6606,7 @@ getOpfamilies(Archive *fout) OpfamilyInfo *opfinfo; int i_tableoid; int i_oid; + int i_opfmethod; int i_opfname; int i_opfnamespace; int i_opfowner; @@ -6583,7 +6618,7 @@ getOpfamilies(Archive *fout) * system-defined opfamilies at dump-out time. */ - appendPQExpBufferStr(query, "SELECT tableoid, oid, opfname, " + appendPQExpBufferStr(query, "SELECT tableoid, oid, opfmethod, opfname, " "opfnamespace, " "opfowner " "FROM pg_opfamily"); @@ -6597,6 +6632,7 @@ getOpfamilies(Archive *fout) i_tableoid = PQfnumber(res, "tableoid"); i_oid = PQfnumber(res, "oid"); i_opfname = PQfnumber(res, "opfname"); + i_opfmethod = PQfnumber(res, "opfmethod"); i_opfnamespace = PQfnumber(res, "opfnamespace"); i_opfowner = PQfnumber(res, "opfowner"); @@ -6609,6 +6645,7 @@ getOpfamilies(Archive *fout) opfinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_opfname)); opfinfo[i].dobj.namespace = findNamespace(atooid(PQgetvalue(res, i, i_opfnamespace))); + opfinfo[i].opfmethod = atooid(PQgetvalue(res, i, i_opfmethod)); opfinfo[i].rolname = getRoleName(PQgetvalue(res, i, i_opfowner)); /* Decide whether we want to dump it */ diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index 93a4475d51b80..dde85ed156cc8 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -260,6 +260,8 @@ typedef struct _oprInfo DumpableObject dobj; const char *rolname; char oprkind; + Oid oprleft; + Oid oprright; Oid oprcode; } OprInfo; @@ -273,12 +275,14 @@ typedef struct _accessMethodInfo typedef struct _opclassInfo { DumpableObject dobj; + Oid opcmethod; const char *rolname; } OpclassInfo; typedef struct _opfamilyInfo { DumpableObject dobj; + Oid opfmethod; const char *rolname; } OpfamilyInfo; @@ -286,6 +290,7 @@ typedef struct _collInfo { DumpableObject dobj; const char *rolname; + int collencoding; } CollInfo; typedef struct _convInfo @@ -760,6 +765,7 @@ extern TableInfo *findTableByOid(Oid oid); extern TypeInfo *findTypeByOid(Oid oid); extern FuncInfo *findFuncByOid(Oid oid); extern OprInfo *findOprByOid(Oid oid); +extern AccessMethodInfo *findAccessMethodByOid(Oid oid); extern CollInfo *findCollationByOid(Oid oid); extern NamespaceInfo *findNamespaceByOid(Oid oid); extern ExtensionInfo *findExtensionByOid(Oid oid); diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c index f99a0797ea7fb..a02da3e9652c1 100644 --- a/src/bin/pg_dump/pg_dump_sort.c +++ b/src/bin/pg_dump/pg_dump_sort.c @@ -162,6 +162,8 @@ static DumpId postDataBoundId; static int DOTypeNameCompare(const void *p1, const void *p2); +static int pgTypeNameCompare(Oid typid1, Oid typid2); +static int accessMethodNameCompare(Oid am1, Oid am2); static bool TopoSort(DumpableObject **objs, int numObjs, DumpableObject **ordering, @@ -228,12 +230,39 @@ DOTypeNameCompare(const void *p1, const void *p2) else if (obj2->namespace) return 1; - /* Sort by name */ + /* + * Sort by name. With a few exceptions, names here are single catalog + * columns. To get a fuller picture, grep pg_dump.c for "dobj.name = ". + * Names here don't match "Name:" in plain format output, which is a + * _tocEntry.tag. For example, DumpableObject.name of a constraint is + * pg_constraint.conname, but _tocEntry.tag of a constraint is relname and + * conname joined with a space. + */ cmpval = strcmp(obj1->name, obj2->name); if (cmpval != 0) return cmpval; - /* To have a stable sort order, break ties for some object types */ + /* + * Sort by type. This helps types that share a type priority without + * sharing a unique name constraint, e.g. opclass and opfamily. + */ + cmpval = obj1->objType - obj2->objType; + if (cmpval != 0) + return cmpval; + + /* + * To have a stable sort order, break ties for some object types. Most + * catalogs have a natural key, e.g. pg_proc_proname_args_nsp_index. Where + * the above "namespace" and "name" comparisons don't cover all natural + * key columns, compare the rest here. + * + * The natural key usually refers to other catalogs by surrogate keys. + * Hence, this translates each of those references to the natural key of + * the referenced catalog. That may descend through multiple levels of + * catalog references. For example, to sort by pg_proc.proargtypes, + * descend to each pg_type and then further to its pg_namespace, for an + * overall sort by (nspname, typname). + */ if (obj1->objType == DO_FUNC || obj1->objType == DO_AGG) { FuncInfo *fobj1 = *(FuncInfo *const *) p1; @@ -246,22 +275,10 @@ DOTypeNameCompare(const void *p1, const void *p2) return cmpval; for (i = 0; i < fobj1->nargs; i++) { - TypeInfo *argtype1 = findTypeByOid(fobj1->argtypes[i]); - TypeInfo *argtype2 = findTypeByOid(fobj2->argtypes[i]); - - if (argtype1 && argtype2) - { - if (argtype1->dobj.namespace && argtype2->dobj.namespace) - { - cmpval = strcmp(argtype1->dobj.namespace->dobj.name, - argtype2->dobj.namespace->dobj.name); - if (cmpval != 0) - return cmpval; - } - cmpval = strcmp(argtype1->dobj.name, argtype2->dobj.name); - if (cmpval != 0) - return cmpval; - } + cmpval = pgTypeNameCompare(fobj1->argtypes[i], + fobj2->argtypes[i]); + if (cmpval != 0) + return cmpval; } } else if (obj1->objType == DO_OPERATOR) @@ -273,6 +290,57 @@ DOTypeNameCompare(const void *p1, const void *p2) cmpval = (oobj2->oprkind - oobj1->oprkind); if (cmpval != 0) return cmpval; + /* Within an oprkind, sort by argument type names */ + cmpval = pgTypeNameCompare(oobj1->oprleft, oobj2->oprleft); + if (cmpval != 0) + return cmpval; + cmpval = pgTypeNameCompare(oobj1->oprright, oobj2->oprright); + if (cmpval != 0) + return cmpval; + } + else if (obj1->objType == DO_OPCLASS) + { + OpclassInfo *opcobj1 = *(OpclassInfo *const *) p1; + OpclassInfo *opcobj2 = *(OpclassInfo *const *) p2; + + /* Sort by access method name, per pg_opclass_am_name_nsp_index */ + cmpval = accessMethodNameCompare(opcobj1->opcmethod, + opcobj2->opcmethod); + if (cmpval != 0) + return cmpval; + } + else if (obj1->objType == DO_OPFAMILY) + { + OpfamilyInfo *opfobj1 = *(OpfamilyInfo *const *) p1; + OpfamilyInfo *opfobj2 = *(OpfamilyInfo *const *) p2; + + /* Sort by access method name, per pg_opfamily_am_name_nsp_index */ + cmpval = accessMethodNameCompare(opfobj1->opfmethod, + opfobj2->opfmethod); + if (cmpval != 0) + return cmpval; + } + else if (obj1->objType == DO_COLLATION) + { + CollInfo *cobj1 = *(CollInfo *const *) p1; + CollInfo *cobj2 = *(CollInfo *const *) p2; + + /* + * Sort by encoding, per pg_collation_name_enc_nsp_index. Technically, + * this is not necessary, because wherever this changes dump order, + * restoring the dump fails anyway. CREATE COLLATION can't create a + * tie for this to break, because it imposes restrictions to make + * (nspname, collname) uniquely identify a collation within a given + * DatabaseEncoding. While pg_import_system_collations() can create a + * tie, pg_dump+restore fails after + * pg_import_system_collations('my_schema') does so. However, there's + * little to gain by ignoring one natural key column on the basis of + * those limitations elsewhere, so respect the full natural key like + * we do for other object types. + */ + cmpval = cobj1->collencoding - cobj2->collencoding; + if (cmpval != 0) + return cmpval; } else if (obj1->objType == DO_ATTRDEF) { @@ -317,11 +385,143 @@ DOTypeNameCompare(const void *p1, const void *p2) if (cmpval != 0) return cmpval; } + else if (obj1->objType == DO_CONSTRAINT) + { + ConstraintInfo *robj1 = *(ConstraintInfo *const *) p1; + ConstraintInfo *robj2 = *(ConstraintInfo *const *) p2; - /* Usually shouldn't get here, but if we do, sort by OID */ + /* + * Sort domain constraints before table constraints, for consistency + * with our decision to sort CREATE DOMAIN before CREATE TABLE. + */ + if (robj1->condomain) + { + if (robj2->condomain) + { + /* Sort by domain name (domain namespace was considered) */ + cmpval = strcmp(robj1->condomain->dobj.name, + robj2->condomain->dobj.name); + if (cmpval != 0) + return cmpval; + } + else + return PRIO_TYPE - PRIO_TABLE; + } + else if (robj2->condomain) + return PRIO_TABLE - PRIO_TYPE; + else + { + /* Sort by table name (table namespace was considered already) */ + cmpval = strcmp(robj1->contable->dobj.name, + robj2->contable->dobj.name); + if (cmpval != 0) + return cmpval; + } + } + else if (obj1->objType == DO_PUBLICATION_REL) + { + PublicationRelInfo *probj1 = *(PublicationRelInfo *const *) p1; + PublicationRelInfo *probj2 = *(PublicationRelInfo *const *) p2; + + /* Sort by publication name, since (namespace, name) match the rel */ + cmpval = strcmp(probj1->publication->dobj.name, + probj2->publication->dobj.name); + if (cmpval != 0) + return cmpval; + } + else if (obj1->objType == DO_PUBLICATION_TABLE_IN_SCHEMA) + { + PublicationSchemaInfo *psobj1 = *(PublicationSchemaInfo *const *) p1; + PublicationSchemaInfo *psobj2 = *(PublicationSchemaInfo *const *) p2; + + /* Sort by publication name, since ->name is just nspname */ + cmpval = strcmp(psobj1->publication->dobj.name, + psobj2->publication->dobj.name); + if (cmpval != 0) + return cmpval; + } + + /* + * Shouldn't get here except after catalog corruption, but if we do, sort + * by OID. This may make logically-identical databases differ in the + * order of objects in dump output. Users will get spurious schema diffs. + * Expect flaky failures of 002_pg_upgrade.pl test 'dump outputs from + * original and restored regression databases match' if the regression + * database contains objects allowing that test to reach here. That's a + * consequence of the test using "pg_restore -j", which doesn't fully + * constrain OID assignment order. + */ + Assert(false); return oidcmp(obj1->catId.oid, obj2->catId.oid); } +/* Compare two OID-identified pg_type values by nspname, then by typname. */ +static int +pgTypeNameCompare(Oid typid1, Oid typid2) +{ + TypeInfo *typobj1; + TypeInfo *typobj2; + int cmpval; + + if (typid1 == typid2) + return 0; + + typobj1 = findTypeByOid(typid1); + typobj2 = findTypeByOid(typid2); + + if (!typobj1 || !typobj2) + { + /* + * getTypes() didn't find some OID. Assume catalog corruption, e.g. + * an oprright value without the corresponding OID in a pg_type row. + * Report as "equal", so the caller uses the next available basis for + * comparison, e.g. the next function argument. + * + * Unary operators have InvalidOid in oprleft (if oprkind='r') or in + * oprright (if oprkind='l'). Caller already sorted by oprkind, + * calling us only for like-kind operators. Hence, "typid1 == typid2" + * took care of InvalidOid. (v14 removed postfix operator support. + * Hence, when dumping from v14+, only oprleft can be InvalidOid.) + */ + Assert(false); + return 0; + } + + if (!typobj1->dobj.namespace || !typobj2->dobj.namespace) + Assert(false); /* catalog corruption */ + else + { + cmpval = strcmp(typobj1->dobj.namespace->dobj.name, + typobj2->dobj.namespace->dobj.name); + if (cmpval != 0) + return cmpval; + } + return strcmp(typobj1->dobj.name, typobj2->dobj.name); +} + +/* Compare two OID-identified pg_am values by amname. */ +static int +accessMethodNameCompare(Oid am1, Oid am2) +{ + AccessMethodInfo *amobj1; + AccessMethodInfo *amobj2; + + if (am1 == am2) + return 0; + + amobj1 = findAccessMethodByOid(am1); + amobj2 = findAccessMethodByOid(am2); + + if (!amobj1 || !amobj2) + { + /* catalog corruption: handle like pgTypeNameCompare() does */ + Assert(false); + return 0; + } + + return strcmp(amobj1->dobj.name, amobj2->dobj.name); +} + /* * Sort the given objects into a safe dump order using dependency diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out index 3a2eacd793f70..1ec3fa34a2d5a 100644 --- a/src/test/regress/expected/publication.out +++ b/src/test/regress/expected/publication.out @@ -1934,3 +1934,24 @@ RESET client_min_messages; RESET SESSION AUTHORIZATION; DROP ROLE regress_publication_user, regress_publication_user2; DROP ROLE regress_publication_user_dummy; +-- stage objects for pg_dump tests +CREATE SCHEMA pubme CREATE TABLE t0 (c int, d int) CREATE TABLE t1 (c int); +CREATE SCHEMA pubme2 CREATE TABLE t0 (c int, d int); +SET client_min_messages = 'ERROR'; +CREATE PUBLICATION dump_pub_qual_1ct FOR + TABLE ONLY pubme.t0 (c, d) WHERE (c > 0); +CREATE PUBLICATION dump_pub_qual_2ct FOR + TABLE ONLY pubme.t0 (c) WHERE (c > 0), + TABLE ONLY pubme.t1 (c); +CREATE PUBLICATION dump_pub_nsp_1ct FOR + TABLES IN SCHEMA pubme; +CREATE PUBLICATION dump_pub_nsp_2ct FOR + TABLES IN SCHEMA pubme, + TABLES IN SCHEMA pubme2; +CREATE PUBLICATION dump_pub_all FOR + TABLE ONLY pubme.t0, + TABLE ONLY pubme.t1 WHERE (c < 0), + TABLES IN SCHEMA pubme, + TABLES IN SCHEMA pubme2 + WITH (publish_via_partition_root = true); +RESET client_min_messages; diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql index c9e309190dfa6..2585f08318150 100644 --- a/src/test/regress/sql/publication.sql +++ b/src/test/regress/sql/publication.sql @@ -1229,3 +1229,25 @@ RESET client_min_messages; RESET SESSION AUTHORIZATION; DROP ROLE regress_publication_user, regress_publication_user2; DROP ROLE regress_publication_user_dummy; + +-- stage objects for pg_dump tests +CREATE SCHEMA pubme CREATE TABLE t0 (c int, d int) CREATE TABLE t1 (c int); +CREATE SCHEMA pubme2 CREATE TABLE t0 (c int, d int); +SET client_min_messages = 'ERROR'; +CREATE PUBLICATION dump_pub_qual_1ct FOR + TABLE ONLY pubme.t0 (c, d) WHERE (c > 0); +CREATE PUBLICATION dump_pub_qual_2ct FOR + TABLE ONLY pubme.t0 (c) WHERE (c > 0), + TABLE ONLY pubme.t1 (c); +CREATE PUBLICATION dump_pub_nsp_1ct FOR + TABLES IN SCHEMA pubme; +CREATE PUBLICATION dump_pub_nsp_2ct FOR + TABLES IN SCHEMA pubme, + TABLES IN SCHEMA pubme2; +CREATE PUBLICATION dump_pub_all FOR + TABLE ONLY pubme.t0, + TABLE ONLY pubme.t1 WHERE (c < 0), + TABLES IN SCHEMA pubme, + TABLES IN SCHEMA pubme2 + WITH (publish_via_partition_root = true); +RESET client_min_messages; From dbf5a83d4650fc893838a2f92306b3d6439f55ba Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Thu, 31 Jul 2025 15:15:44 +0200 Subject: [PATCH 46/67] Schema-qualify unnest() in ALTER DATABASE ... RESET MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 9df8727c5067 failed to schema-quality the unnest() call in the query used to list the variables in ALTER DATABASE ... RESET. If there's another unnest() function in the search_path, this could cause either failures, or even security issues (when the tab-completion gets used by privileged accounts). Report and fix by Dagfinn Ilmari Mannsåker. Backpatch to 18, same as 9df8727c5067. Author: Dagfinn Ilmari Mannsåker Reviewed-by: jian he Discussion: https://postgr.es/m/87qzyghw2x.fsf%40wibble.ilmari.org Discussion: https://postgr.es/m/87tt4lumqz.fsf%40wibble.ilmari.org Backpatch-through: 18 --- src/bin/psql/tab-complete.in.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c index dbc586c5bc370..3c50847f958cd 100644 --- a/src/bin/psql/tab-complete.in.c +++ b/src/bin/psql/tab-complete.in.c @@ -1010,7 +1010,7 @@ static const SchemaQuery Query_for_trigger_of_table = { #define Query_for_list_of_database_vars \ "SELECT conf FROM ("\ -" SELECT setdatabase, pg_catalog.split_part(unnest(setconfig),'=',1) conf"\ +" SELECT setdatabase, pg_catalog.split_part(pg_catalog.unnest(setconfig),'=',1) conf"\ " FROM pg_db_role_setting "\ " ) s, pg_database d "\ " WHERE s.setdatabase = d.oid "\ From ca09ef3a6aa69a1250bc83e6d9517f28a2ff181c Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Thu, 31 Jul 2025 15:17:26 +0200 Subject: [PATCH 47/67] Fix tab completion for ALTER ROLE|USER ... RESET MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit c407d5426b87 added tab completion for ALTER ROLE|USER ... RESET, with the intent to offer only the variables actually set on the role. But as soon as the user started typing something, it would start to offer all possible matching variables. Fix this the same way ALTER DATABASE ... RESET does it, i.e. by properly considering the prefix. A second issue causing similar symptoms (offering variables that are not actually set for a role) was caused by a match to another pattern. The ALTER DATABASE ... RESET was already excluded, so do the same thing for ROLE/USER. Report and fix by Dagfinn Ilmari Mannsåker. Backpatch to 18, same as c407d5426b87. Author: Dagfinn Ilmari Mannsåker Reviewed-by: jian he Discussion: https://postgr.es/m/87qzyghw2x.fsf%40wibble.ilmari.org Discussion: https://postgr.es/m/87tt4lumqz.fsf%40wibble.ilmari.org Backpatch-through: 18 --- src/bin/psql/tab-complete.in.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c index 3c50847f958cd..1f2ca946fc500 100644 --- a/src/bin/psql/tab-complete.in.c +++ b/src/bin/psql/tab-complete.in.c @@ -1086,9 +1086,12 @@ Keywords_for_list_of_owner_roles, "PUBLIC" " WHERE usename LIKE '%s'" #define Query_for_list_of_user_vars \ -" SELECT pg_catalog.split_part(pg_catalog.unnest(rolconfig),'=',1) "\ -" FROM pg_catalog.pg_roles "\ -" WHERE rolname LIKE '%s'" +"SELECT conf FROM ("\ +" SELECT rolname, pg_catalog.split_part(pg_catalog.unnest(rolconfig),'=',1) conf"\ +" FROM pg_catalog.pg_roles"\ +" ) s"\ +" WHERE s.conf like '%s' "\ +" AND s.rolname LIKE '%s'" #define Query_for_list_of_access_methods \ " SELECT amname "\ @@ -2517,7 +2520,10 @@ match_previous_words(int pattern_id, /* ALTER USER,ROLE RESET */ else if (Matches("ALTER", "USER|ROLE", MatchAny, "RESET")) + { + set_completion_reference(prev2_wd); COMPLETE_WITH_QUERY_PLUS(Query_for_list_of_user_vars, "ALL"); + } /* ALTER USER,ROLE WITH */ else if (Matches("ALTER", "USER|ROLE", MatchAny, "WITH")) @@ -5015,7 +5021,7 @@ match_previous_words(int pattern_id, /* Complete with a variable name */ else if (TailMatches("SET|RESET") && !TailMatches("UPDATE", MatchAny, "SET") && - !TailMatches("ALTER", "DATABASE", MatchAny, "RESET")) + !TailMatches("ALTER", "DATABASE|USER|ROLE", MatchAny, "RESET")) COMPLETE_WITH_QUERY_VERBATIM_PLUS(Query_for_list_of_set_vars, "CONSTRAINTS", "TRANSACTION", From 2ab2d6f970584b7ca60cfdf6569336903aa88db5 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Fri, 1 Aug 2025 07:58:48 +0000 Subject: [PATCH 48/67] Fix a deadlock during ALTER SUBSCRIPTION ... DROP PUBLICATION. A deadlock can occur when the DDL command and the apply worker acquire catalog locks in different orders while dropping replication origins. The issue is rare in PG16 and higher branches because, in most cases, the tablesync worker performs the origin drop in those branches, and its locking sequence does not conflict with DDL operations. This patch ensures consistent lock acquisition to prevent such deadlocks. As per buildfarm. Reported-by: Alexander Lakhin Author: Ajin Cherian Reviewed-by: Hayato Kuroda Reviewed-by: vignesh C Reviewed-by: Amit Kapila Backpatch-through: 14, where it was introduced Discussion: https://postgr.es/m/bab95e12-6cc5-4ebb-80a8-3e41956aa297@gmail.com --- src/backend/catalog/pg_subscription.c | 21 +++++++++++-- src/backend/replication/logical/tablesync.c | 34 ++++++++++++++++++--- src/include/catalog/pg_subscription_rel.h | 2 +- 3 files changed, 49 insertions(+), 8 deletions(-) diff --git a/src/backend/catalog/pg_subscription.c b/src/backend/catalog/pg_subscription.c index 63c2992d19f75..244acf52f3602 100644 --- a/src/backend/catalog/pg_subscription.c +++ b/src/backend/catalog/pg_subscription.c @@ -320,7 +320,7 @@ AddSubscriptionRelState(Oid subid, Oid relid, char state, */ void UpdateSubscriptionRelState(Oid subid, Oid relid, char state, - XLogRecPtr sublsn) + XLogRecPtr sublsn, bool already_locked) { Relation rel; HeapTuple tup; @@ -328,9 +328,24 @@ UpdateSubscriptionRelState(Oid subid, Oid relid, char state, Datum values[Natts_pg_subscription_rel]; bool replaces[Natts_pg_subscription_rel]; - LockSharedObject(SubscriptionRelationId, subid, 0, AccessShareLock); + if (already_locked) + { +#ifdef USE_ASSERT_CHECKING + LOCKTAG tag; - rel = table_open(SubscriptionRelRelationId, RowExclusiveLock); + Assert(CheckRelationOidLockedByMe(SubscriptionRelRelationId, + RowExclusiveLock, true)); + SET_LOCKTAG_OBJECT(tag, InvalidOid, SubscriptionRelationId, subid, 0); + Assert(LockHeldByMe(&tag, AccessShareLock, true)); +#endif + + rel = table_open(SubscriptionRelRelationId, NoLock); + } + else + { + LockSharedObject(SubscriptionRelationId, subid, 0, AccessShareLock); + rel = table_open(SubscriptionRelRelationId, RowExclusiveLock); + } /* Try finding existing mapping. */ tup = SearchSysCacheCopy2(SUBSCRIPTIONRELMAP, diff --git a/src/backend/replication/logical/tablesync.c b/src/backend/replication/logical/tablesync.c index 3fea0a0206ed3..d3356bc84ee0c 100644 --- a/src/backend/replication/logical/tablesync.c +++ b/src/backend/replication/logical/tablesync.c @@ -316,7 +316,8 @@ process_syncing_tables_for_sync(XLogRecPtr current_lsn) UpdateSubscriptionRelState(MyLogicalRepWorker->subid, MyLogicalRepWorker->relid, MyLogicalRepWorker->relstate, - MyLogicalRepWorker->relstate_lsn); + MyLogicalRepWorker->relstate_lsn, + false); /* * End streaming so that LogRepWorkerWalRcvConn can be used to drop @@ -425,6 +426,7 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn) ListCell *lc; bool started_tx = false; bool should_exit = false; + Relation rel = NULL; Assert(!IsTransactionState()); @@ -492,7 +494,17 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn) * worker to remove the origin tracking as if there is any * error while dropping we won't restart it to drop the * origin. So passing missing_ok = true. + * + * Lock the subscription and origin in the same order as we + * are doing during DDL commands to avoid deadlocks. See + * AlterSubscription_refresh. */ + LockSharedObject(SubscriptionRelationId, MyLogicalRepWorker->subid, + 0, AccessShareLock); + + if (!rel) + rel = table_open(SubscriptionRelRelationId, RowExclusiveLock); + ReplicationOriginNameForLogicalRep(MyLogicalRepWorker->subid, rstate->relid, originname, @@ -504,7 +516,7 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn) */ UpdateSubscriptionRelState(MyLogicalRepWorker->subid, rstate->relid, rstate->state, - rstate->lsn); + rstate->lsn, true); } } else @@ -555,7 +567,14 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn) * This is required to avoid any undetected deadlocks * due to any existing lock as deadlock detector won't * be able to detect the waits on the latch. + * + * Also close any tables prior to the commit. */ + if (rel) + { + table_close(rel, NoLock); + rel = NULL; + } CommitTransactionCommand(); pgstat_report_stat(false); } @@ -623,6 +642,11 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn) } } + /* Close table if opened */ + if (rel) + table_close(rel, NoLock); + + if (started_tx) { /* @@ -1414,7 +1438,8 @@ LogicalRepSyncTableStart(XLogRecPtr *origin_startpos) UpdateSubscriptionRelState(MyLogicalRepWorker->subid, MyLogicalRepWorker->relid, MyLogicalRepWorker->relstate, - MyLogicalRepWorker->relstate_lsn); + MyLogicalRepWorker->relstate_lsn, + false); CommitTransactionCommand(); pgstat_report_stat(true); @@ -1547,7 +1572,8 @@ LogicalRepSyncTableStart(XLogRecPtr *origin_startpos) UpdateSubscriptionRelState(MyLogicalRepWorker->subid, MyLogicalRepWorker->relid, SUBREL_STATE_FINISHEDCOPY, - MyLogicalRepWorker->relstate_lsn); + MyLogicalRepWorker->relstate_lsn, + false); CommitTransactionCommand(); diff --git a/src/include/catalog/pg_subscription_rel.h b/src/include/catalog/pg_subscription_rel.h index c91797c869c24..f458447a0e5fb 100644 --- a/src/include/catalog/pg_subscription_rel.h +++ b/src/include/catalog/pg_subscription_rel.h @@ -85,7 +85,7 @@ typedef struct SubscriptionRelState extern void AddSubscriptionRelState(Oid subid, Oid relid, char state, XLogRecPtr sublsn, bool retain_lock); extern void UpdateSubscriptionRelState(Oid subid, Oid relid, char state, - XLogRecPtr sublsn); + XLogRecPtr sublsn, bool already_locked); extern char GetSubscriptionRelState(Oid subid, Oid relid, XLogRecPtr *sublsn); extern void RemoveSubscriptionRel(Oid subid, Oid relid); From a4801eb691ed18ca483f3e2b5f313d5610a7c839 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 1 Aug 2025 18:24:19 +0300 Subject: [PATCH 49/67] libpq: Complain about missing BackendKeyData later with PGgetCancel() PostgreSQL always sends the BackendKeyData message at connection startup, but there are some third party backend implementations out there that don't support cancellation, and don't send the message [1]. While the protocol docs left it up for interpretation if that is valid behavior, libpq in PostgreSQL 17 and below accepted it. It does not seem like the libpq behavior was intentional though, since it did so by sending CancelRequest messages with all zeros to such servers (instead of returning an error or making the cancel a no-op). In version 18 the behavior was changed to return an error when trying to create the cancel object with PGgetCancel() or PGcancelCreate(). This was done without any discussion, as part of supporting different lengths of cancel packets for the new 3.2 version of the protocol. This commit changes the behavior of PGgetCancel() / PGcancel() once more to only return an error when the cancel object is actually used to send a cancellation, instead of when merely creating the object. The reason to do so is that some clients [2] create a cancel object as part of their connection creation logic (thus having the cancel object ready for later when they need it), so if creating the cancel object returns an error, the whole connection attempt fails. By delaying the error, such clients will still be able to connect to the third party backend implementations in question, but when actually trying to cancel a query, the user will be notified that that is not possible for the server that they are connected to. This commit only changes the behavior of the older PGgetCancel() / PQcancel() functions, not the more modern PQcancelCreate() family of functions. I.e. PQcancelCreate() returns a failed connection object (CONNECTION_BAD) if the server didn't send a cancellation key. Unlike the old PQgetCancel() function, we're not aware of any clients in the field that use PQcancelCreate() during connection startup in a way that would prevent connecting to such servers. [1] AWS RDS Proxy is definitely one of them, and CockroachDB might be another. [2] psycopg2 (but not psycopg3). Author: Jelte Fennema-Nio Reviewed-by: Jacob Champion Backpatch-through: 18 Discussion: https://www.postgresql.org/message-id/20250617.101056.1437027795118961504.ishii%40postgresql.org --- doc/src/sgml/protocol.sgml | 5 +++++ src/interfaces/libpq/fe-cancel.c | 28 +++++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index b115884acb346..e56eac8fd0fa0 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -537,6 +537,11 @@ The frontend should not respond to this message, but should continue listening for a ReadyForQuery message. + + The PostgreSQL server will always send this + message, but some third party backend implementations of the protocol + that don't support query cancellation are known not to. + diff --git a/src/interfaces/libpq/fe-cancel.c b/src/interfaces/libpq/fe-cancel.c index 65517c5703bca..c872a0267f089 100644 --- a/src/interfaces/libpq/fe-cancel.c +++ b/src/interfaces/libpq/fe-cancel.c @@ -379,7 +379,24 @@ PQgetCancel(PGconn *conn) /* Check that we have received a cancellation key */ if (conn->be_cancel_key_len == 0) - return NULL; + { + /* + * In case there is no cancel key, return an all-zero PGcancel object. + * Actually calling PQcancel on this will fail, but we allow creating + * the PGcancel object anyway. Arguably it would be better return NULL + * to indicate that cancellation is not possible, but there'd be no + * way for the caller to distinguish "out of memory" from "server did + * not send a cancel key". Also, this is how PGgetCancel() has always + * behaved, and if we changed it, some clients would stop working + * altogether with servers that don't support cancellation. (The + * modern PQcancelCreate() function returns a failed connection object + * instead.) + * + * The returned dummy object has cancel_pkt_len == 0; we check for + * that in PQcancel() to identify it as a dummy. + */ + return calloc(1, sizeof(PGcancel)); + } cancel_req_len = offsetof(CancelRequestPacket, cancelAuthCode) + conn->be_cancel_key_len; cancel = malloc(offsetof(PGcancel, cancel_req) + cancel_req_len); @@ -544,6 +561,15 @@ PQcancel(PGcancel *cancel, char *errbuf, int errbufsize) return false; } + if (cancel->cancel_pkt_len == 0) + { + /* This is a dummy PGcancel object, see PQgetCancel */ + strlcpy(errbuf, "PQcancel() -- no cancellation key received", errbufsize); + /* strlcpy probably doesn't change errno, but be paranoid */ + SOCK_ERRNO_SET(save_errno); + return false; + } + /* * We need to open a temporary connection to the postmaster. Do this with * only kernel calls. From 0ed92cf50cc428dad1732a5e604e5450d47acba3 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 1 Aug 2025 10:06:57 -0700 Subject: [PATCH 50/67] pg_dump: reject combination of "only" and "with" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Álvaro Herrera Discussion: https://postgr.es/m/8ce896d1a05040905cc1a3afbc04e94d8e95669a.camel@j-davis.com Backpatch-through: 18 --- src/bin/pg_dump/pg_dump.c | 19 ++++++++++++++----- src/bin/pg_dump/pg_restore.c | 19 ++++++++++++++----- src/bin/pg_dump/t/002_pg_dump.pl | 18 +++++++++++------- 3 files changed, 39 insertions(+), 17 deletions(-) diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 273117c977c52..b1ac8d7b50994 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -860,6 +860,17 @@ main(int argc, char **argv) if (with_statistics && no_statistics) pg_fatal("options --with-statistics and --no-statistics cannot be used together"); + /* reject conflicting "-only" and "with-" options */ + if (data_only && (with_schema || with_statistics)) + pg_fatal("options %s and %s cannot be used together", + "-a/--data-only", with_schema ? "--with-schema" : "--with-statistics"); + if (schema_only && (with_data || with_statistics)) + pg_fatal("options %s and %s cannot be used together", + "-s/--schema-only", with_data ? "--with-data" : "--with-statistics"); + if (statistics_only && (with_data || with_schema)) + pg_fatal("options %s and %s cannot be used together", + "--statistics-only", with_data ? "--with-data" : "--with-schema"); + if (schema_only && foreign_servers_include_patterns.head != NULL) pg_fatal("options -s/--schema-only and --include-foreign-data cannot be used together"); @@ -873,11 +884,9 @@ main(int argc, char **argv) pg_fatal("option --if-exists requires option -c/--clean"); /* - * Set derivative flags. An "-only" option may be overridden by an - * explicit "with-" option; e.g. "--schema-only --with-statistics" will - * include schema and statistics. Other ambiguous or nonsensical - * combinations, e.g. "--schema-only --no-schema", will have already - * caused an error in one of the checks above. + * Set derivative flags. Ambiguous or nonsensical combinations, e.g. + * "--schema-only --no-schema", will have already caused an error in one + * of the checks above. */ dopt.dumpData = ((dopt.dumpData && !schema_only && !statistics_only) || (data_only || with_data)) && !no_data; diff --git a/src/bin/pg_dump/pg_restore.c b/src/bin/pg_dump/pg_restore.c index b4e1acdb63fbb..2c727b9f1560b 100644 --- a/src/bin/pg_dump/pg_restore.c +++ b/src/bin/pg_dump/pg_restore.c @@ -381,6 +381,17 @@ main(int argc, char **argv) if (with_statistics && no_statistics) pg_fatal("options --with-statistics and --no-statistics cannot be used together"); + /* reject conflicting "only-" and "with-" options */ + if (data_only && (with_schema || with_statistics)) + pg_fatal("options %s and %s cannot be used together", + "-a/--data-only", with_schema ? "--with-schema" : "--with-statistics"); + if (schema_only && (with_data || with_statistics)) + pg_fatal("options %s and %s cannot be used together", + "-s/--schema-only", with_data ? "--with-data" : "--with-statistics"); + if (statistics_only && (with_data || with_schema)) + pg_fatal("options %s and %s cannot be used together", + "--statistics-only", with_data ? "--with-data" : "--with-schema"); + if (data_only && opts->dropSchema) pg_fatal("options -c/--clean and -a/--data-only cannot be used together"); @@ -399,11 +410,9 @@ main(int argc, char **argv) pg_fatal("cannot specify both --single-transaction and multiple jobs"); /* - * Set derivative flags. An "-only" option may be overridden by an - * explicit "with-" option; e.g. "--schema-only --with-statistics" will - * include schema and statistics. Other ambiguous or nonsensical - * combinations, e.g. "--schema-only --no-schema", will have already - * caused an error in one of the checks above. + * Set derivative flags. Ambiguous or nonsensical combinations, e.g. + * "--schema-only --no-schema", will have already caused an error in one + * of the checks above. */ opts->dumpData = ((opts->dumpData && !schema_only && !statistics_only) || (data_only || with_data)) && !no_data; diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index 6c7ec80e271ce..d597842908e6d 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -799,13 +799,6 @@ 'postgres', ], }, - schema_only_with_statistics => { - dump_cmd => [ - 'pg_dump', '--no-sync', - "--file=$tempdir/schema_only_with_statistics.sql", - '--schema-only', '--with-statistics', 'postgres', - ], - }, no_schema => { dump_cmd => [ 'pg_dump', '--no-sync', @@ -5207,6 +5200,17 @@ qr/\Qpg_dump: error: no matching schemas were found for pattern\E/, 'no matching schemas'); +command_fails_like( + [ + 'pg_dump', + '--port' => $port, + '--strict-names', + '--schema-only', + '--with-statistics', + ], + qr/\Qpg_dump: error: options -s\/--schema-only and --with-statistics cannot be used together\E/, + 'cannot use --schema-only and --with-statistics together'); + command_fails_like( [ 'pg_dump', From a2c6c4ed3145a411c2591ebd7ca14f30dd98b896 Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Fri, 1 Aug 2025 18:02:41 +0000 Subject: [PATCH 51/67] Fix typo in AutoVacLauncherMain(). Author: Yugo Nagata Discussion: https://postgr.es/m/20250802002027.cd35c481f6c6bae7ca2a3e26@sraoss.co.jp --- src/backend/postmaster/autovacuum.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 9474095f271a1..8908603464c5c 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -562,10 +562,10 @@ AutoVacLauncherMain(const void *startup_data, size_t startup_data_len) /* * Create the initial database list. The invariant we want this list to - * keep is that it's ordered by decreasing next_time. As soon as an entry - * is updated to a higher time, it will be moved to the front (which is - * correct because the only operation is to add autovacuum_naptime to the - * entry, and time always increases). + * keep is that it's ordered by decreasing next_worker. As soon as an + * entry is updated to a higher time, it will be moved to the front (which + * is correct because the only operation is to add autovacuum_naptime to + * the entry, and time always increases). */ rebuild_database_list(InvalidOid); From 9eb6068fb64c36889102a09c030d1d9f4d832821 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Fri, 1 Aug 2025 16:52:11 -0500 Subject: [PATCH 52/67] Allow resetting unknown custom GUCs with reserved prefixes. Currently, ALTER DATABASE/ROLE/SYSTEM RESET [ALL] with an unknown custom GUC with a prefix reserved by MarkGUCPrefixReserved() errors (unless a superuser runs a RESET ALL variant). This is problematic for cases such as an extension library upgrade that removes a GUC. To fix, simply make sure the relevant code paths explicitly allow it. Note that we require superuser or privileges on the parameter to reset it. This is perhaps a bit more restrictive than is necessary, but it's not clear whether further relaxing the requirements is safe. Oversight in commit 88103567cb. The ALTER SYSTEM fix is dependent on commit 2d870b4aef, which first appeared in v17. Unfortunately, back-patching that commit would introduce ABI breakage, and while that breakage seems unlikely to bother anyone, it doesn't seem worth the risk. Hence, the ALTER SYSTEM part of this commit is omitted on v15 and v16. Reported-by: Mert Alev Reviewed-by: Laurenz Albe Discussion: https://postgr.es/m/18964-ba09dea8c98fccd6%40postgresql.org Backpatch-through: 15 --- contrib/auto_explain/Makefile | 2 ++ contrib/auto_explain/expected/alter_reset.out | 19 ++++++++++++++++ contrib/auto_explain/meson.build | 5 +++++ contrib/auto_explain/sql/alter_reset.sql | 22 +++++++++++++++++++ src/backend/utils/misc/guc.c | 21 +++++++++++++----- 5 files changed, 64 insertions(+), 5 deletions(-) create mode 100644 contrib/auto_explain/expected/alter_reset.out create mode 100644 contrib/auto_explain/sql/alter_reset.sql diff --git a/contrib/auto_explain/Makefile b/contrib/auto_explain/Makefile index efd127d3cae64..94ab28e7c06b9 100644 --- a/contrib/auto_explain/Makefile +++ b/contrib/auto_explain/Makefile @@ -6,6 +6,8 @@ OBJS = \ auto_explain.o PGFILEDESC = "auto_explain - logging facility for execution plans" +REGRESS = alter_reset + TAP_TESTS = 1 ifdef USE_PGXS diff --git a/contrib/auto_explain/expected/alter_reset.out b/contrib/auto_explain/expected/alter_reset.out new file mode 100644 index 0000000000000..ec355189806ae --- /dev/null +++ b/contrib/auto_explain/expected/alter_reset.out @@ -0,0 +1,19 @@ +-- +-- This tests resetting unknown custom GUCs with reserved prefixes. There's +-- nothing specific to auto_explain; this is just a convenient place to put +-- this test. +-- +SELECT current_database() AS datname \gset +CREATE ROLE regress_ae_role; +ALTER DATABASE :"datname" SET auto_explain.bogus = 1; +ALTER ROLE regress_ae_role SET auto_explain.bogus = 1; +ALTER ROLE regress_ae_role IN DATABASE :"datname" SET auto_explain.bogus = 1; +ALTER SYSTEM SET auto_explain.bogus = 1; +LOAD 'auto_explain'; +WARNING: invalid configuration parameter name "auto_explain.bogus", removing it +DETAIL: "auto_explain" is now a reserved prefix. +ALTER DATABASE :"datname" RESET auto_explain.bogus; +ALTER ROLE regress_ae_role RESET auto_explain.bogus; +ALTER ROLE regress_ae_role IN DATABASE :"datname" RESET auto_explain.bogus; +ALTER SYSTEM RESET auto_explain.bogus; +DROP ROLE regress_ae_role; diff --git a/contrib/auto_explain/meson.build b/contrib/auto_explain/meson.build index 92dc9df6f7cac..a9b45cc235f12 100644 --- a/contrib/auto_explain/meson.build +++ b/contrib/auto_explain/meson.build @@ -20,6 +20,11 @@ tests += { 'name': 'auto_explain', 'sd': meson.current_source_dir(), 'bd': meson.current_build_dir(), + 'regress': { + 'sql': [ + 'alter_reset', + ], + }, 'tap': { 'tests': [ 't/001_auto_explain.pl', diff --git a/contrib/auto_explain/sql/alter_reset.sql b/contrib/auto_explain/sql/alter_reset.sql new file mode 100644 index 0000000000000..bf621454ec24a --- /dev/null +++ b/contrib/auto_explain/sql/alter_reset.sql @@ -0,0 +1,22 @@ +-- +-- This tests resetting unknown custom GUCs with reserved prefixes. There's +-- nothing specific to auto_explain; this is just a convenient place to put +-- this test. +-- + +SELECT current_database() AS datname \gset +CREATE ROLE regress_ae_role; + +ALTER DATABASE :"datname" SET auto_explain.bogus = 1; +ALTER ROLE regress_ae_role SET auto_explain.bogus = 1; +ALTER ROLE regress_ae_role IN DATABASE :"datname" SET auto_explain.bogus = 1; +ALTER SYSTEM SET auto_explain.bogus = 1; + +LOAD 'auto_explain'; + +ALTER DATABASE :"datname" RESET auto_explain.bogus; +ALTER ROLE regress_ae_role RESET auto_explain.bogus; +ALTER ROLE regress_ae_role IN DATABASE :"datname" RESET auto_explain.bogus; +ALTER SYSTEM RESET auto_explain.bogus; + +DROP ROLE regress_ae_role; diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 667df448732f2..ce5449f287853 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -4722,8 +4722,13 @@ AlterSystemSetConfigFile(AlterSystemStmt *altersysstmt) * the config file cannot cause postmaster start to fail, so we * don't have to be too tense about possibly installing a bad * value.) + * + * As an exception, we skip this check if this is a RESET command + * for an unknown custom GUC, else there'd be no way for users to + * remove such settings with reserved prefixes. */ - (void) assignable_custom_variable_name(name, false, ERROR); + if (value || !valid_custom_variable_name(name)) + (void) assignable_custom_variable_name(name, false, ERROR); } /* @@ -6711,6 +6716,7 @@ validate_option_array_item(const char *name, const char *value, { struct config_generic *gconf; + bool reset_custom; /* * There are three cases to consider: @@ -6729,16 +6735,21 @@ validate_option_array_item(const char *name, const char *value, * it's assumed to be fully validated.) * * name is not known and can't be created as a placeholder. Throw error, - * unless skipIfNoPermissions is true, in which case return false. + * unless skipIfNoPermissions or reset_custom is true. If reset_custom is + * true, this is a RESET or RESET ALL operation for an unknown custom GUC + * with a reserved prefix, in which case we want to fall through to the + * placeholder case described in the preceding paragraph (else there'd be + * no way for users to remove them). Otherwise, return false. */ - gconf = find_option(name, true, skipIfNoPermissions, ERROR); - if (!gconf) + reset_custom = (!value && valid_custom_variable_name(name)); + gconf = find_option(name, true, skipIfNoPermissions || reset_custom, ERROR); + if (!gconf && !reset_custom) { /* not known, failed to make a placeholder */ return false; } - if (gconf->flags & GUC_CUSTOM_PLACEHOLDER) + if (!gconf || gconf->flags & GUC_CUSTOM_PLACEHOLDER) { /* * We cannot do any meaningful check on the value, so only permissions From 3b3fa949009393541e552b8ae42cc2b03be25549 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Sat, 2 Aug 2025 17:08:45 +0900 Subject: [PATCH 53/67] Fix use-after-free with INSERT ON CONFLICT changes in reorderbuffer.c In ReorderBufferProcessTXN(), used to send the data of a transaction to an output plugin, INSERT ON CONFLICT changes (INTERNAL_SPEC_INSERT) are delayed until a confirmation record arrives (INTERNAL_SPEC_CONFIRM), updating the change being processed. 8c58624df462 has added an extra step after processing a change to update the progress of the transaction, by calling the callback update_progress_txn() based on the LSN stored in a change after a threshold of CHANGES_THRESHOLD (100) is reached. This logic has missed the fact that for an INSERT ON CONFLICT change the data is freed once processed, hence update_progress_txn() could be called pointing to a LSN value that's already been freed. This could result in random crashes, depending on the workload. Per discussion, this issue is fixed by reusing in update_progress_txn() the LSN from the change processed found at the beginning of the loop, meaning that for a INTERNAL_SPEC_CONFIRM change the progress is updated using the LSN of the INTERNAL_SPEC_CONFIRM change, and not the LSN from its INTERNAL_SPEC_INSERT change. This is actually more correct, as we want to update the progress to point to the INTERNAL_SPEC_CONFIRM change. Masahiko Sawada has found a nice trick to reproduce the issue: hardcode CHANGES_THRESHOLD at 1 and run test_decoding (test "ddl" being enough) on an instance running valgrind. The bug has been analyzed by Ethan Mertz, who also originally suggested the solution used in this patch. Issue introduced by 8c58624df462, so backpatch down to v16. Author: Ethan Mertz Co-authored-by: Michael Paquier Reviewed-by: Amit Kapila Reviewed-by: Masahiko Sawada Discussion: https://postgr.es/m/aIsQqDZ7x4LAQ6u1@paquier.xyz Backpatch-through: 16 --- src/backend/replication/logical/reorderbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index 5febd154b6bae..34cf05668ae84 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -2599,7 +2599,7 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, if (++changes_count >= CHANGES_THRESHOLD) { - rb->update_progress_txn(rb, txn, change->lsn); + rb->update_progress_txn(rb, txn, prev_lsn); changes_count = 0; } } From 37e774458542f2fc34ce3610c5fe39cf7b1d4818 Mon Sep 17 00:00:00 2001 From: Etsuro Fujita Date: Sat, 2 Aug 2025 18:30:00 +0900 Subject: [PATCH 54/67] Doc: clarify the restrictions of AFTER triggers with transition tables. It was not very clear that the triggers are only allowed on plain tables (not foreign tables). Also, rephrase the documentation for better readability. Follow up to commit 9e6104c66. Reported-by: Etsuro Fujita Author: Ashutosh Bapat Reviewed-by: Etsuro Fujita Discussion: https://postgr.es/m/CAPmGK16XBs9ptNr8Lk4f-tJZogf6y-Prz%3D8yhvJbb_4dpsc3mQ%40mail.gmail.com Backpatch-through: 13 --- doc/src/sgml/ref/create_trigger.sgml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/doc/src/sgml/ref/create_trigger.sgml b/doc/src/sgml/ref/create_trigger.sgml index ed6d206ae7143..0d8d463479bc1 100644 --- a/doc/src/sgml/ref/create_trigger.sgml +++ b/doc/src/sgml/ref/create_trigger.sgml @@ -197,9 +197,11 @@ CREATE [ OR REPLACE ] [ CONSTRAINT ] TRIGGER name of the rows inserted, deleted, or modified by the current SQL statement. This feature lets the trigger see a global view of what the statement did, not just one row at a time. This option is only allowed for - an AFTER trigger that is not a constraint trigger; also, if - the trigger is an UPDATE trigger, it must not specify - a column_name list. + an AFTER trigger on a plain table (not a foreign table). + The trigger should not be a constraint trigger. Also, if the trigger is + an UPDATE trigger, it must not specify + a column_name list when using + this option. OLD TABLE may only be specified once, and only for a trigger that can fire on UPDATE or DELETE; it creates a transition relation containing the before-images of all rows From 2106fe25a1c025a75effb7fefcbd47c68d4b9914 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Sat, 2 Aug 2025 19:54:23 +0900 Subject: [PATCH 55/67] Fix typo in foreign_key.sql Introduced by eec0040c4bcd. Author: Chao Li Discussion: https://postgr.es/m/CAEoWx2kKMdtWKQiYNuwG2L41YwHA7G3sUsRfD9esPJwZyX1+Eg@mail.gmail.com Backpatch-through: 18 --- src/test/regress/expected/foreign_key.out | 2 +- src/test/regress/sql/foreign_key.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/regress/expected/foreign_key.out b/src/test/regress/expected/foreign_key.out index f9bd252444f53..dc541d61adfa5 100644 --- a/src/test/regress/expected/foreign_key.out +++ b/src/test/regress/expected/foreign_key.out @@ -1750,7 +1750,7 @@ Indexes: Referenced by: TABLE "fk_partitioned_fk" CONSTRAINT "fk_partitioned_fk_a_b_fkey" FOREIGN KEY (a, b) REFERENCES fk_notpartitioned_pk(a, b) --- Check the exsting FK trigger +-- Check the existing FK trigger SELECT conname, tgrelid::regclass as tgrel, regexp_replace(tgname, '[0-9]+', 'N') as tgname, tgtype FROM pg_trigger t JOIN pg_constraint c ON (t.tgconstraint = c.oid) WHERE tgrelid IN (SELECT relid FROM pg_partition_tree('fk_partitioned_fk'::regclass) diff --git a/src/test/regress/sql/foreign_key.sql b/src/test/regress/sql/foreign_key.sql index cfcecb4e911ad..39174ad1eb9a0 100644 --- a/src/test/regress/sql/foreign_key.sql +++ b/src/test/regress/sql/foreign_key.sql @@ -1296,7 +1296,7 @@ UPDATE fk_notpartitioned_pk SET b = 2504 WHERE a = 2500; -- check psql behavior \d fk_notpartitioned_pk --- Check the exsting FK trigger +-- Check the existing FK trigger SELECT conname, tgrelid::regclass as tgrel, regexp_replace(tgname, '[0-9]+', 'N') as tgname, tgtype FROM pg_trigger t JOIN pg_constraint c ON (t.tgconstraint = c.oid) WHERE tgrelid IN (SELECT relid FROM pg_partition_tree('fk_partitioned_fk'::regclass) From 6a46089e458f2d700dd3b8c3f6fc782de933529a Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Sat, 2 Aug 2025 07:51:42 -0700 Subject: [PATCH 56/67] Simplify options in pg_dump and pg_restore. Remove redundant options --with-data and --with-schema, and rename --with-statistics to just --statistics. Reviewed-by: Nathan Bossart Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/f379d0aeefe8effe13302a436bc28f549f09e924.camel@j-davis.com Backpatch-through: 18 --- doc/src/sgml/ref/pg_dump.sgml | 38 +++------- doc/src/sgml/ref/pg_dumpall.sgml | 38 +++------- doc/src/sgml/ref/pg_restore.sgml | 40 +++------- src/bin/pg_dump/pg_dump.c | 43 +++-------- src/bin/pg_dump/pg_dumpall.c | 16 +--- src/bin/pg_dump/pg_restore.c | 35 +++------ src/bin/pg_dump/t/002_pg_dump.pl | 124 +++++++++++++++---------------- src/bin/pg_upgrade/dump.c | 2 +- 8 files changed, 118 insertions(+), 218 deletions(-) diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index 2ae084b5fa6fc..0bc7609bdf815 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -1354,6 +1354,15 @@ PostgreSQL documentation + + + + + Dump statistics. + + + + @@ -1440,33 +1449,6 @@ PostgreSQL documentation - - - - - Dump data. This is the default. - - - - - - - - - Dump schema (data definitions). This is the default. - - - - - - - - - Dump statistics. - - - - @@ -1682,7 +1664,7 @@ CREATE DATABASE foo WITH TEMPLATE template0; - If is specified, + If is specified, pg_dump will include most optimizer statistics in the resulting dump file. However, some statistics may not be included, such as those created explicitly with or diff --git a/doc/src/sgml/ref/pg_dumpall.sgml b/doc/src/sgml/ref/pg_dumpall.sgml index f4cbc8288e3ad..364442f00f28e 100644 --- a/doc/src/sgml/ref/pg_dumpall.sgml +++ b/doc/src/sgml/ref/pg_dumpall.sgml @@ -605,6 +605,15 @@ exclude database PATTERN + + + + + Dump statistics. + + + + @@ -640,33 +649,6 @@ exclude database PATTERN - - - - - Dump data. This is the default. - - - - - - - - - Dump schema (data definitions). This is the default. - - - - - - - - - Dump statistics. - - - - @@ -878,7 +860,7 @@ exclude database PATTERN - If is specified, + If is specified, pg_dumpall will include most optimizer statistics in the resulting dump file. However, some statistics may not be included, such as those created explicitly with or diff --git a/doc/src/sgml/ref/pg_restore.sgml b/doc/src/sgml/ref/pg_restore.sgml index 2abe05d47e936..261ead1503955 100644 --- a/doc/src/sgml/ref/pg_restore.sgml +++ b/doc/src/sgml/ref/pg_restore.sgml @@ -815,6 +815,16 @@ PostgreSQL documentation + + + + + Output commands to restore statistics, if the archive contains them. + This is the default. + + + + @@ -873,36 +883,6 @@ PostgreSQL documentation - - - - - Output commands to restore data, if the archive contains them. - This is the default. - - - - - - - - - Output commands to restore schema (data definitions), if the archive - contains them. This is the default. - - - - - - - - - Output commands to restore statistics, if the archive contains them. - This is the default. - - - - diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index b1ac8d7b50994..f3a353a61a58e 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -449,8 +449,6 @@ main(int argc, char **argv) bool data_only = false; bool schema_only = false; bool statistics_only = false; - bool with_data = false; - bool with_schema = false; bool with_statistics = false; bool no_data = false; bool no_schema = false; @@ -514,6 +512,7 @@ main(int argc, char **argv) {"section", required_argument, NULL, 5}, {"serializable-deferrable", no_argument, &dopt.serializable_deferrable, 1}, {"snapshot", required_argument, NULL, 6}, + {"statistics", no_argument, NULL, 22}, {"statistics-only", no_argument, NULL, 18}, {"strict-names", no_argument, &strict_names, 1}, {"use-set-session-authorization", no_argument, &dopt.use_setsessauth, 1}, @@ -528,9 +527,6 @@ main(int argc, char **argv) {"no-toast-compression", no_argument, &dopt.no_toast_compression, 1}, {"no-unlogged-table-data", no_argument, &dopt.no_unlogged_table_data, 1}, {"no-sync", no_argument, NULL, 7}, - {"with-data", no_argument, NULL, 22}, - {"with-schema", no_argument, NULL, 23}, - {"with-statistics", no_argument, NULL, 24}, {"on-conflict-do-nothing", no_argument, &dopt.do_nothing, 1}, {"rows-per-insert", required_argument, NULL, 10}, {"include-foreign-data", required_argument, NULL, 11}, @@ -798,14 +794,6 @@ main(int argc, char **argv) break; case 22: - with_data = true; - break; - - case 23: - with_schema = true; - break; - - case 24: with_statistics = true; break; @@ -852,24 +840,17 @@ main(int argc, char **argv) if (statistics_only && no_statistics) pg_fatal("options --statistics-only and --no-statistics cannot be used together"); - /* reject conflicting "with-" and "no-" options */ - if (with_data && no_data) - pg_fatal("options --with-data and --no-data cannot be used together"); - if (with_schema && no_schema) - pg_fatal("options --with-schema and --no-schema cannot be used together"); + /* reject conflicting "no-" options */ if (with_statistics && no_statistics) - pg_fatal("options --with-statistics and --no-statistics cannot be used together"); + pg_fatal("options --statistics and --no-statistics cannot be used together"); - /* reject conflicting "-only" and "with-" options */ - if (data_only && (with_schema || with_statistics)) - pg_fatal("options %s and %s cannot be used together", - "-a/--data-only", with_schema ? "--with-schema" : "--with-statistics"); - if (schema_only && (with_data || with_statistics)) + /* reject conflicting "-only" options */ + if (data_only && with_statistics) pg_fatal("options %s and %s cannot be used together", - "-s/--schema-only", with_data ? "--with-data" : "--with-statistics"); - if (statistics_only && (with_data || with_schema)) + "-a/--data-only", "--statistics"); + if (schema_only && with_statistics) pg_fatal("options %s and %s cannot be used together", - "--statistics-only", with_data ? "--with-data" : "--with-schema"); + "-s/--schema-only", "--statistics"); if (schema_only && foreign_servers_include_patterns.head != NULL) pg_fatal("options -s/--schema-only and --include-foreign-data cannot be used together"); @@ -889,9 +870,9 @@ main(int argc, char **argv) * of the checks above. */ dopt.dumpData = ((dopt.dumpData && !schema_only && !statistics_only) || - (data_only || with_data)) && !no_data; + data_only) && !no_data; dopt.dumpSchema = ((dopt.dumpSchema && !data_only && !statistics_only) || - (schema_only || with_schema)) && !no_schema; + schema_only) && !no_schema; dopt.dumpStatistics = ((dopt.dumpStatistics && !schema_only && !data_only) || (statistics_only || with_statistics)) && !no_statistics; @@ -1364,6 +1345,7 @@ help(const char *progname) printf(_(" --sequence-data include sequence data in dump\n")); printf(_(" --serializable-deferrable wait until the dump can run without anomalies\n")); printf(_(" --snapshot=SNAPSHOT use given snapshot for the dump\n")); + printf(_(" --statistics dump the statistics\n")); printf(_(" --statistics-only dump only the statistics, not schema or data\n")); printf(_(" --strict-names require table and/or schema include patterns to\n" " match at least one entity each\n")); @@ -1372,9 +1354,6 @@ help(const char *progname) printf(_(" --use-set-session-authorization\n" " use SET SESSION AUTHORIZATION commands instead of\n" " ALTER OWNER commands to set ownership\n")); - printf(_(" --with-data dump the data\n")); - printf(_(" --with-schema dump the schema\n")); - printf(_(" --with-statistics dump the statistics\n")); printf(_("\nConnection options:\n")); printf(_(" -d, --dbname=DBNAME database to dump\n")); diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c index 87d10df07c411..27aa1b656989c 100644 --- a/src/bin/pg_dump/pg_dumpall.c +++ b/src/bin/pg_dump/pg_dumpall.c @@ -105,8 +105,6 @@ static int no_subscriptions = 0; static int no_toast_compression = 0; static int no_unlogged_table_data = 0; static int no_role_passwords = 0; -static int with_data = 0; -static int with_schema = 0; static int with_statistics = 0; static int server_version; static int load_via_partition_root = 0; @@ -180,11 +178,9 @@ main(int argc, char *argv[]) {"no-sync", no_argument, NULL, 4}, {"no-toast-compression", no_argument, &no_toast_compression, 1}, {"no-unlogged-table-data", no_argument, &no_unlogged_table_data, 1}, - {"with-data", no_argument, &with_data, 1}, - {"with-schema", no_argument, &with_schema, 1}, - {"with-statistics", no_argument, &with_statistics, 1}, {"on-conflict-do-nothing", no_argument, &on_conflict_do_nothing, 1}, {"rows-per-insert", required_argument, NULL, 7}, + {"statistics", no_argument, &with_statistics, 1}, {"statistics-only", no_argument, &statistics_only, 1}, {"filter", required_argument, NULL, 8}, {"sequence-data", no_argument, &sequence_data, 1}, @@ -475,12 +471,8 @@ main(int argc, char *argv[]) appendPQExpBufferStr(pgdumpopts, " --no-toast-compression"); if (no_unlogged_table_data) appendPQExpBufferStr(pgdumpopts, " --no-unlogged-table-data"); - if (with_data) - appendPQExpBufferStr(pgdumpopts, " --with-data"); - if (with_schema) - appendPQExpBufferStr(pgdumpopts, " --with-schema"); if (with_statistics) - appendPQExpBufferStr(pgdumpopts, " --with-statistics"); + appendPQExpBufferStr(pgdumpopts, " --statistics"); if (on_conflict_do_nothing) appendPQExpBufferStr(pgdumpopts, " --on-conflict-do-nothing"); if (statistics_only) @@ -712,13 +704,11 @@ help(void) printf(_(" --quote-all-identifiers quote all identifiers, even if not key words\n")); printf(_(" --rows-per-insert=NROWS number of rows per INSERT; implies --inserts\n")); printf(_(" --sequence-data include sequence data in dump\n")); + printf(_(" --statistics dump the statistics\n")); printf(_(" --statistics-only dump only the statistics, not schema or data\n")); printf(_(" --use-set-session-authorization\n" " use SET SESSION AUTHORIZATION commands instead of\n" " ALTER OWNER commands to set ownership\n")); - printf(_(" --with-data dump the data\n")); - printf(_(" --with-schema dump the schema\n")); - printf(_(" --with-statistics dump the statistics\n")); printf(_("\nConnection options:\n")); printf(_(" -d, --dbname=CONNSTR connect using connection string\n")); diff --git a/src/bin/pg_dump/pg_restore.c b/src/bin/pg_dump/pg_restore.c index 2c727b9f1560b..6c129278bc52b 100644 --- a/src/bin/pg_dump/pg_restore.c +++ b/src/bin/pg_dump/pg_restore.c @@ -82,8 +82,6 @@ main(int argc, char **argv) static int no_subscriptions = 0; static int strict_names = 0; static int statistics_only = 0; - static int with_data = 0; - static int with_schema = 0; static int with_statistics = 0; struct option cmdopts[] = { @@ -139,9 +137,7 @@ main(int argc, char **argv) {"no-security-labels", no_argument, &no_security_labels, 1}, {"no-subscriptions", no_argument, &no_subscriptions, 1}, {"no-statistics", no_argument, &no_statistics, 1}, - {"with-data", no_argument, &with_data, 1}, - {"with-schema", no_argument, &with_schema, 1}, - {"with-statistics", no_argument, &with_statistics, 1}, + {"statistics", no_argument, &with_statistics, 1}, {"statistics-only", no_argument, &statistics_only, 1}, {"filter", required_argument, NULL, 4}, @@ -373,24 +369,17 @@ main(int argc, char **argv) if (statistics_only && no_statistics) pg_fatal("options --statistics-only and --no-statistics cannot be used together"); - /* reject conflicting "with-" and "no-" options */ - if (with_data && no_data) - pg_fatal("options --with-data and --no-data cannot be used together"); - if (with_schema && no_schema) - pg_fatal("options --with-schema and --no-schema cannot be used together"); + /* reject conflicting "no-" options */ if (with_statistics && no_statistics) - pg_fatal("options --with-statistics and --no-statistics cannot be used together"); + pg_fatal("options --statistics and --no-statistics cannot be used together"); - /* reject conflicting "only-" and "with-" options */ - if (data_only && (with_schema || with_statistics)) + /* reject conflicting "only-" options */ + if (data_only && with_statistics) pg_fatal("options %s and %s cannot be used together", - "-a/--data-only", with_schema ? "--with-schema" : "--with-statistics"); - if (schema_only && (with_data || with_statistics)) + "-a/--data-only", "--statistics"); + if (schema_only && with_statistics) pg_fatal("options %s and %s cannot be used together", - "-s/--schema-only", with_data ? "--with-data" : "--with-statistics"); - if (statistics_only && (with_data || with_schema)) - pg_fatal("options %s and %s cannot be used together", - "--statistics-only", with_data ? "--with-data" : "--with-schema"); + "-s/--schema-only", "--statistics"); if (data_only && opts->dropSchema) pg_fatal("options -c/--clean and -a/--data-only cannot be used together"); @@ -415,9 +404,9 @@ main(int argc, char **argv) * of the checks above. */ opts->dumpData = ((opts->dumpData && !schema_only && !statistics_only) || - (data_only || with_data)) && !no_data; + data_only) && !no_data; opts->dumpSchema = ((opts->dumpSchema && !data_only && !statistics_only) || - (schema_only || with_schema)) && !no_schema; + schema_only) && !no_schema; opts->dumpStatistics = ((opts->dumpStatistics && !schema_only && !data_only) || (statistics_only || with_statistics)) && !no_statistics; @@ -558,6 +547,7 @@ usage(const char *progname) printf(_(" --no-table-access-method do not restore table access methods\n")); printf(_(" --no-tablespaces do not restore tablespace assignments\n")); printf(_(" --section=SECTION restore named section (pre-data, data, or post-data)\n")); + printf(_(" --statistics restore the statistics\n")); printf(_(" --statistics-only restore only the statistics, not schema or data\n")); printf(_(" --strict-names require table and/or schema include patterns to\n" " match at least one entity each\n")); @@ -565,9 +555,6 @@ usage(const char *progname) printf(_(" --use-set-session-authorization\n" " use SET SESSION AUTHORIZATION commands instead of\n" " ALTER OWNER commands to set ownership\n")); - printf(_(" --with-data restore the data\n")); - printf(_(" --with-schema restore the schema\n")); - printf(_(" --with-statistics restore the statistics\n")); printf(_("\nConnection options:\n")); printf(_(" -h, --host=HOSTNAME database server host or socket directory\n")); diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index d597842908e6d..a86b38466de14 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -68,7 +68,7 @@ '--no-data', '--sequence-data', '--binary-upgrade', - '--with-statistics', + '--statistics', '--dbname' => 'postgres', # alternative way to specify database ], restore_cmd => [ @@ -76,7 +76,7 @@ '--format' => 'custom', '--verbose', '--file' => "$tempdir/binary_upgrade.sql", - '--with-statistics', + '--statistics', "$tempdir/binary_upgrade.dump", ], }, @@ -90,13 +90,13 @@ '--format' => 'custom', '--compress' => '1', '--file' => "$tempdir/compression_gzip_custom.dump", - '--with-statistics', + '--statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--file' => "$tempdir/compression_gzip_custom.sql", - '--with-statistics', + '--statistics', "$tempdir/compression_gzip_custom.dump", ], command_like => { @@ -119,7 +119,7 @@ '--format' => 'directory', '--compress' => 'gzip:1', '--file' => "$tempdir/compression_gzip_dir", - '--with-statistics', + '--statistics', 'postgres', ], # Give coverage for manually compressed blobs.toc files during @@ -137,7 +137,7 @@ 'pg_restore', '--jobs' => '2', '--file' => "$tempdir/compression_gzip_dir.sql", - '--with-statistics', + '--statistics', "$tempdir/compression_gzip_dir", ], }, @@ -150,7 +150,7 @@ '--format' => 'plain', '--compress' => '1', '--file' => "$tempdir/compression_gzip_plain.sql.gz", - '--with-statistics', + '--statistics', 'postgres', ], # Decompress the generated file to run through the tests. @@ -169,13 +169,13 @@ '--format' => 'custom', '--compress' => 'lz4', '--file' => "$tempdir/compression_lz4_custom.dump", - '--with-statistics', + '--statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--file' => "$tempdir/compression_lz4_custom.sql", - '--with-statistics', + '--statistics', "$tempdir/compression_lz4_custom.dump", ], command_like => { @@ -198,7 +198,7 @@ '--format' => 'directory', '--compress' => 'lz4:1', '--file' => "$tempdir/compression_lz4_dir", - '--with-statistics', + '--statistics', 'postgres', ], # Verify that data files were compressed @@ -210,7 +210,7 @@ 'pg_restore', '--jobs' => '2', '--file' => "$tempdir/compression_lz4_dir.sql", - '--with-statistics', + '--statistics', "$tempdir/compression_lz4_dir", ], }, @@ -223,7 +223,7 @@ '--format' => 'plain', '--compress' => 'lz4', '--file' => "$tempdir/compression_lz4_plain.sql.lz4", - '--with-statistics', + '--statistics', 'postgres', ], # Decompress the generated file to run through the tests. @@ -245,13 +245,13 @@ '--format' => 'custom', '--compress' => 'zstd', '--file' => "$tempdir/compression_zstd_custom.dump", - '--with-statistics', + '--statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--file' => "$tempdir/compression_zstd_custom.sql", - '--with-statistics', + '--statistics', "$tempdir/compression_zstd_custom.dump", ], command_like => { @@ -273,7 +273,7 @@ '--format' => 'directory', '--compress' => 'zstd:1', '--file' => "$tempdir/compression_zstd_dir", - '--with-statistics', + '--statistics', 'postgres', ], # Give coverage for manually compressed blobs.toc files during @@ -294,7 +294,7 @@ 'pg_restore', '--jobs' => '2', '--file' => "$tempdir/compression_zstd_dir.sql", - '--with-statistics', + '--statistics', "$tempdir/compression_zstd_dir", ], }, @@ -308,7 +308,7 @@ '--format' => 'plain', '--compress' => 'zstd:long', '--file' => "$tempdir/compression_zstd_plain.sql.zst", - '--with-statistics', + '--statistics', 'postgres', ], # Decompress the generated file to run through the tests. @@ -327,7 +327,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/clean.sql", '--clean', - '--with-statistics', + '--statistics', '--dbname' => 'postgres', # alternative way to specify database ], }, @@ -338,7 +338,7 @@ '--clean', '--if-exists', '--encoding' => 'UTF8', # no-op, just for testing - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -357,7 +357,7 @@ '--create', '--no-reconnect', # no-op, just for testing '--verbose', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -376,7 +376,7 @@ dump_cmd => [ 'pg_dump', '--no-sync', '--file' => "$tempdir/defaults.sql", - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -385,7 +385,7 @@ dump_cmd => [ 'pg_dump', '--no-sync', '--file' => "$tempdir/defaults_no_public.sql", - '--with-statistics', + '--statistics', 'regress_pg_dump_test', ], }, @@ -395,7 +395,7 @@ 'pg_dump', '--no-sync', '--clean', '--file' => "$tempdir/defaults_no_public_clean.sql", - '--with-statistics', + '--statistics', 'regress_pg_dump_test', ], }, @@ -404,7 +404,7 @@ dump_cmd => [ 'pg_dump', '--no-sync', '--file' => "$tempdir/defaults_public_owner.sql", - '--with-statistics', + '--statistics', 'regress_public_owner', ], }, @@ -419,14 +419,14 @@ 'pg_dump', '--format' => 'custom', '--file' => "$tempdir/defaults_custom_format.dump", - '--with-statistics', + '--statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--format' => 'custom', '--file' => "$tempdir/defaults_custom_format.sql", - '--with-statistics', + '--statistics', "$tempdir/defaults_custom_format.dump", ], command_like => { @@ -451,14 +451,14 @@ 'pg_dump', '--format' => 'directory', '--file' => "$tempdir/defaults_dir_format", - '--with-statistics', + '--statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--format' => 'directory', '--file' => "$tempdir/defaults_dir_format.sql", - '--with-statistics', + '--statistics', "$tempdir/defaults_dir_format", ], command_like => { @@ -484,13 +484,13 @@ '--format' => 'directory', '--jobs' => 2, '--file' => "$tempdir/defaults_parallel", - '--with-statistics', + '--statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--file' => "$tempdir/defaults_parallel.sql", - '--with-statistics', + '--statistics', "$tempdir/defaults_parallel", ], }, @@ -502,14 +502,14 @@ 'pg_dump', '--format' => 'tar', '--file' => "$tempdir/defaults_tar_format.tar", - '--with-statistics', + '--statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--format' => 'tar', '--file' => "$tempdir/defaults_tar_format.sql", - '--with-statistics', + '--statistics', "$tempdir/defaults_tar_format.tar", ], }, @@ -518,7 +518,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/exclude_dump_test_schema.sql", '--exclude-schema' => 'dump_test', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -527,7 +527,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/exclude_test_table.sql", '--exclude-table' => 'dump_test.test_table', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -536,7 +536,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/exclude_measurement.sql", '--exclude-table-and-children' => 'dump_test.measurement', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -546,7 +546,7 @@ '--file' => "$tempdir/exclude_measurement_data.sql", '--exclude-table-data-and-children' => 'dump_test.measurement', '--no-unlogged-table-data', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -556,7 +556,7 @@ '--file' => "$tempdir/exclude_test_table_data.sql", '--exclude-table-data' => 'dump_test.test_table', '--no-unlogged-table-data', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -575,7 +575,7 @@ '--file' => "$tempdir/pg_dumpall_globals.sql", '--globals-only', '--no-sync', - '--with-statistics', + '--statistics', ], }, pg_dumpall_globals_clean => { @@ -585,14 +585,14 @@ '--globals-only', '--clean', '--no-sync', - '--with-statistics', + '--statistics', ], }, pg_dumpall_dbprivs => { dump_cmd => [ 'pg_dumpall', '--no-sync', '--file' => "$tempdir/pg_dumpall_dbprivs.sql", - '--with-statistics', + '--statistics', ], }, pg_dumpall_exclude => { @@ -602,7 +602,7 @@ '--file' => "$tempdir/pg_dumpall_exclude.sql", '--exclude-database' => '*dump_test*', '--no-sync', - '--with-statistics', + '--statistics', ], }, no_toast_compression => { @@ -610,7 +610,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/no_toast_compression.sql", '--no-toast-compression', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -619,7 +619,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/no_large_objects.sql", '--no-large-objects', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -628,7 +628,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/no_policies.sql", '--no-policies', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -637,7 +637,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/no_privs.sql", '--no-privileges', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -646,7 +646,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/no_owner.sql", '--no-owner', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -655,7 +655,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/no_table_access_method.sql", '--no-table-access-method', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -664,7 +664,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/only_dump_test_schema.sql", '--schema' => 'dump_test', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -675,7 +675,7 @@ '--table' => 'dump_test.test_table', '--lock-wait-timeout' => (1000 * $PostgreSQL::Test::Utils::timeout_default), - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -686,7 +686,7 @@ '--table-and-children' => 'dump_test.measurement', '--lock-wait-timeout' => (1000 * $PostgreSQL::Test::Utils::timeout_default), - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -696,7 +696,7 @@ '--file' => "$tempdir/role.sql", '--role' => 'regress_dump_test_role', '--schema' => 'dump_test_second_schema', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -709,13 +709,13 @@ '--file' => "$tempdir/role_parallel", '--role' => 'regress_dump_test_role', '--schema' => 'dump_test_second_schema', - '--with-statistics', + '--statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--file' => "$tempdir/role_parallel.sql", - '--with-statistics', + '--statistics', "$tempdir/role_parallel", ], }, @@ -744,7 +744,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/section_pre_data.sql", '--section' => 'pre-data', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -753,7 +753,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/section_data.sql", '--section' => 'data', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -762,7 +762,7 @@ 'pg_dump', '--no-sync', '--file' => "$tempdir/section_post_data.sql", '--section' => 'post-data', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -773,7 +773,7 @@ '--schema' => 'dump_test', '--large-objects', '--no-large-objects', - '--with-statistics', + '--statistics', 'postgres', ], }, @@ -789,7 +789,7 @@ 'pg_dump', '--no-sync', "--file=$tempdir/no_data_no_schema.sql", '--no-data', '--no-schema', 'postgres', - '--with-statistics', + '--statistics', ], }, statistics_only => { @@ -803,7 +803,7 @@ dump_cmd => [ 'pg_dump', '--no-sync', "--file=$tempdir/no_schema.sql", '--no-schema', - '--with-statistics', 'postgres', + '--statistics', 'postgres', ], },); @@ -5206,10 +5206,10 @@ '--port' => $port, '--strict-names', '--schema-only', - '--with-statistics', + '--statistics', ], - qr/\Qpg_dump: error: options -s\/--schema-only and --with-statistics cannot be used together\E/, - 'cannot use --schema-only and --with-statistics together'); + qr/\Qpg_dump: error: options -s\/--schema-only and --statistics cannot be used together\E/, + 'cannot use --schema-only and --statistics together'); command_fails_like( [ diff --git a/src/bin/pg_upgrade/dump.c b/src/bin/pg_upgrade/dump.c index 183f08ce1e86f..55f6e7b4d9c3e 100644 --- a/src/bin/pg_upgrade/dump.c +++ b/src/bin/pg_upgrade/dump.c @@ -58,7 +58,7 @@ generate_old_dump(void) (user_opts.transfer_mode == TRANSFER_MODE_SWAP) ? "" : "--sequence-data", log_opts.verbose ? "--verbose" : "", - user_opts.do_statistics ? "--with-statistics" : "--no-statistics", + user_opts.do_statistics ? "--statistics" : "--no-statistics", log_opts.dumpdir, sql_file_name, escaped_connstr.data); From 12efa48978c6dba5eca1b95758127181783fb217 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Sun, 3 Aug 2025 10:49:03 +0900 Subject: [PATCH 57/67] Fix assertion failure in pgbench when handling multiple pipeline sync messages. Previously, when running pgbench in pipeline mode with a custom script that triggered retriable errors (e.g., serialization errors), an assertion failure could occur: Assertion failed: (res == ((void*)0)), function discardUntilSync, file pgbench.c, line 3515. The root cause was that pgbench incorrectly assumed only a single pipeline sync message would be received at the end. In reality, multiple pipeline sync messages can be sent and must be handled properly. This commit fixes the issue by updating pgbench to correctly process multiple pipeline sync messages, preventing the assertion failure. Back-patch to v15, where the bug was introduced. Author: Fujii Masao Reviewed-by: Tatsuo Ishii Discussion: https://postgr.es/m/CAHGQGwFAX56Tfx+1ppo431OSWiLLuW72HaGzZ39NkLkop6bMzQ@mail.gmail.com Backpatch-through: 15 --- src/bin/pgbench/pgbench.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index 497a936c141f3..125f3c7bbbe5b 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -3495,6 +3495,8 @@ doRetry(CState *st, pg_time_usec_t *now) static int discardUntilSync(CState *st) { + bool received_sync = false; + /* send a sync */ if (!PQpipelineSync(st->con)) { @@ -3509,10 +3511,21 @@ discardUntilSync(CState *st) PGresult *res = PQgetResult(st->con); if (PQresultStatus(res) == PGRES_PIPELINE_SYNC) + received_sync = true; + else if (received_sync) { - PQclear(res); - res = PQgetResult(st->con); + /* + * PGRES_PIPELINE_SYNC must be followed by another + * PGRES_PIPELINE_SYNC or NULL; otherwise, assert failure. + */ Assert(res == NULL); + + /* + * Reset ongoing sync count to 0 since all PGRES_PIPELINE_SYNC + * results have been discarded. + */ + st->num_syncs = 0; + PQclear(res); break; } PQclear(res); From bb049a79d3447e97c0d4fa220600c423c4474bf9 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 2 Aug 2025 18:26:19 -0400 Subject: [PATCH 58/67] Improve our support for Valgrind's leak tracking. When determining whether an allocated chunk is still reachable, Valgrind will consider only pointers within what it believes to be allocated chunks. Normally, all of a block obtained from malloc() would be considered "allocated" --- but it turns out that if we use VALGRIND_MEMPOOL_ALLOC to designate sub-section(s) of a malloc'ed block as allocated, all the rest of that malloc'ed block is ignored. This leads to lots of false positives of course. In particular, in any multi-malloc-block context, all but the primary block were reported as leaked. We also had a problem with context "ident" strings, which were reported as leaked unless there was some other pointer to them besides the one in the context header. To fix, we need to use VALGRIND_MEMPOOL_ALLOC to designate a context's management structs (the context struct itself and any per-block headers) as allocated chunks. That forces moving the VALGRIND_CREATE_MEMPOOL/VALGRIND_DESTROY_MEMPOOL calls into the per-context-type code, so that the pool identifier can be made as soon as we've allocated the initial block, but otherwise it's fairly straightforward. Note that in Valgrind's eyes there is no distinction between these allocations and the allocations that the mmgr modules hand out to user code. That's fine for now, but perhaps someday we'll want to do better yet. When reading this patch, it's helpful to start with the comments added at the head of mcxt.c. Author: Andres Freund Co-authored-by: Tom Lane Discussion: https://postgr.es/m/285483.1746756246@sss.pgh.pa.us Discussion: https://postgr.es/m/20210317181531.7oggpqevzz6bka3g@alap3.anarazel.de --- src/backend/utils/mmgr/aset.c | 71 +++++++++++++++++++++++++++-- src/backend/utils/mmgr/bump.c | 31 ++++++++++++- src/backend/utils/mmgr/generation.c | 29 ++++++++++++ src/backend/utils/mmgr/mcxt.c | 23 +++++++--- src/backend/utils/mmgr/slab.c | 32 +++++++++++++ src/include/utils/memdebug.h | 1 + 6 files changed, 177 insertions(+), 10 deletions(-) diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c index 666ecd8f78d0e..9ef109ca586bd 100644 --- a/src/backend/utils/mmgr/aset.c +++ b/src/backend/utils/mmgr/aset.c @@ -103,6 +103,8 @@ #define ALLOC_BLOCKHDRSZ MAXALIGN(sizeof(AllocBlockData)) #define ALLOC_CHUNKHDRSZ sizeof(MemoryChunk) +#define FIRST_BLOCKHDRSZ (MAXALIGN(sizeof(AllocSetContext)) + \ + ALLOC_BLOCKHDRSZ) typedef struct AllocBlockData *AllocBlock; /* forward reference */ @@ -458,6 +460,21 @@ AllocSetContextCreateInternal(MemoryContext parent, * we'd leak the header/initial block if we ereport in this stretch. */ + /* Create a vpool associated with the context */ + VALGRIND_CREATE_MEMPOOL(set, 0, false); + + /* + * Create a vchunk covering both the AllocSetContext struct and the keeper + * block's header. (Perhaps it would be more sensible for these to be two + * separate vchunks, but doing that seems to tickle bugs in some versions + * of Valgrind.) We must have these vchunks, and also a vchunk for each + * subsequently-added block header, so that Valgrind considers the + * pointers within them while checking for leaked memory. Note that + * Valgrind doesn't distinguish between these vchunks and those created by + * mcxt.c for the user-accessible-data chunks we allocate. + */ + VALGRIND_MEMPOOL_ALLOC(set, set, FIRST_BLOCKHDRSZ); + /* Fill in the initial block's block header */ block = KeeperBlock(set); block->aset = set; @@ -585,6 +602,14 @@ AllocSetReset(MemoryContext context) #ifdef CLOBBER_FREED_MEMORY wipe_mem(block, block->freeptr - ((char *) block)); #endif + + /* + * We need to free the block header's vchunk explicitly, although + * the user-data vchunks within will go away in the TRIM below. + * Otherwise Valgrind complains about leaked allocations. + */ + VALGRIND_MEMPOOL_FREE(set, block); + free(block); } block = next; @@ -592,6 +617,14 @@ AllocSetReset(MemoryContext context) Assert(context->mem_allocated == keepersize); + /* + * Instruct Valgrind to throw away all the vchunks associated with this + * context, except for the one covering the AllocSetContext and + * keeper-block header. This gets rid of the vchunks for whatever user + * data is getting discarded by the context reset. + */ + VALGRIND_MEMPOOL_TRIM(set, set, FIRST_BLOCKHDRSZ); + /* Reset block size allocation sequence, too */ set->nextBlockSize = set->initBlockSize; } @@ -648,6 +681,9 @@ AllocSetDelete(MemoryContext context) freelist->first_free = (AllocSetContext *) oldset->header.nextchild; freelist->num_free--; + /* Destroy the context's vpool --- see notes below */ + VALGRIND_DESTROY_MEMPOOL(oldset); + /* All that remains is to free the header/initial block */ free(oldset); } @@ -675,13 +711,24 @@ AllocSetDelete(MemoryContext context) #endif if (!IsKeeperBlock(set, block)) + { + /* As in AllocSetReset, free block-header vchunks explicitly */ + VALGRIND_MEMPOOL_FREE(set, block); free(block); + } block = next; } Assert(context->mem_allocated == keepersize); + /* + * Destroy the vpool. We don't seem to need to explicitly free the + * initial block's header vchunk, nor any user-data vchunks that Valgrind + * still knows about; they'll all go away automatically. + */ + VALGRIND_DESTROY_MEMPOOL(set); + /* Finally, free the context header, including the keeper block */ free(set); } @@ -716,6 +763,9 @@ AllocSetAllocLarge(MemoryContext context, Size size, int flags) if (block == NULL) return MemoryContextAllocationFailure(context, size, flags); + /* Make a vchunk covering the new block's header */ + VALGRIND_MEMPOOL_ALLOC(set, block, ALLOC_BLOCKHDRSZ); + context->mem_allocated += blksize; block->aset = set; @@ -922,6 +972,9 @@ AllocSetAllocFromNewBlock(MemoryContext context, Size size, int flags, if (block == NULL) return MemoryContextAllocationFailure(context, size, flags); + /* Make a vchunk covering the new block's header */ + VALGRIND_MEMPOOL_ALLOC(set, block, ALLOC_BLOCKHDRSZ); + context->mem_allocated += blksize; block->aset = set; @@ -1104,6 +1157,10 @@ AllocSetFree(void *pointer) #ifdef CLOBBER_FREED_MEMORY wipe_mem(block, block->freeptr - ((char *) block)); #endif + + /* As in AllocSetReset, free block-header vchunks explicitly */ + VALGRIND_MEMPOOL_FREE(set, block); + free(block); } else @@ -1184,6 +1241,7 @@ AllocSetRealloc(void *pointer, Size size, int flags) * realloc() to make the containing block bigger, or smaller, with * minimum space wastage. */ + AllocBlock newblock; Size chksize; Size blksize; Size oldblksize; @@ -1223,14 +1281,21 @@ AllocSetRealloc(void *pointer, Size size, int flags) blksize = chksize + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ; oldblksize = block->endptr - ((char *) block); - block = (AllocBlock) realloc(block, blksize); - if (block == NULL) + newblock = (AllocBlock) realloc(block, blksize); + if (newblock == NULL) { /* Disallow access to the chunk header. */ VALGRIND_MAKE_MEM_NOACCESS(chunk, ALLOC_CHUNKHDRSZ); return MemoryContextAllocationFailure(&set->header, size, flags); } + /* + * Move the block-header vchunk explicitly. (mcxt.c will take care of + * moving the vchunk for the user data.) + */ + VALGRIND_MEMPOOL_CHANGE(set, block, newblock, ALLOC_BLOCKHDRSZ); + block = newblock; + /* updated separately, not to underflow when (oldblksize > blksize) */ set->header.mem_allocated -= oldblksize; set->header.mem_allocated += blksize; @@ -1294,7 +1359,7 @@ AllocSetRealloc(void *pointer, Size size, int flags) /* Ensure any padding bytes are marked NOACCESS. */ VALGRIND_MAKE_MEM_NOACCESS((char *) pointer + size, chksize - size); - /* Disallow access to the chunk header . */ + /* Disallow access to the chunk header. */ VALGRIND_MAKE_MEM_NOACCESS(chunk, ALLOC_CHUNKHDRSZ); return pointer; diff --git a/src/backend/utils/mmgr/bump.c b/src/backend/utils/mmgr/bump.c index f7a37d1b3e86c..2805d55a2eca9 100644 --- a/src/backend/utils/mmgr/bump.c +++ b/src/backend/utils/mmgr/bump.c @@ -45,7 +45,9 @@ #include "utils/memutils_memorychunk.h" #include "utils/memutils_internal.h" -#define Bump_BLOCKHDRSZ MAXALIGN(sizeof(BumpBlock)) +#define Bump_BLOCKHDRSZ MAXALIGN(sizeof(BumpBlock)) +#define FIRST_BLOCKHDRSZ (MAXALIGN(sizeof(BumpContext)) + \ + Bump_BLOCKHDRSZ) /* No chunk header unless built with MEMORY_CONTEXT_CHECKING */ #ifdef MEMORY_CONTEXT_CHECKING @@ -189,6 +191,12 @@ BumpContextCreate(MemoryContext parent, const char *name, Size minContextSize, * Avoid writing code that can fail between here and MemoryContextCreate; * we'd leak the header and initial block if we ereport in this stretch. */ + + /* See comments about Valgrind interactions in aset.c */ + VALGRIND_CREATE_MEMPOOL(set, 0, false); + /* This vchunk covers the BumpContext and the keeper block header */ + VALGRIND_MEMPOOL_ALLOC(set, set, FIRST_BLOCKHDRSZ); + dlist_init(&set->blocks); /* Fill in the initial block's block header */ @@ -262,6 +270,14 @@ BumpReset(MemoryContext context) BumpBlockFree(set, block); } + /* + * Instruct Valgrind to throw away all the vchunks associated with this + * context, except for the one covering the BumpContext and keeper-block + * header. This gets rid of the vchunks for whatever user data is getting + * discarded by the context reset. + */ + VALGRIND_MEMPOOL_TRIM(set, set, FIRST_BLOCKHDRSZ); + /* Reset block size allocation sequence, too */ set->nextBlockSize = set->initBlockSize; @@ -279,6 +295,10 @@ BumpDelete(MemoryContext context) { /* Reset to release all releasable BumpBlocks */ BumpReset(context); + + /* Destroy the vpool -- see notes in aset.c */ + VALGRIND_DESTROY_MEMPOOL(context); + /* And free the context header and keeper block */ free(context); } @@ -318,6 +338,9 @@ BumpAllocLarge(MemoryContext context, Size size, int flags) if (block == NULL) return MemoryContextAllocationFailure(context, size, flags); + /* Make a vchunk covering the new block's header */ + VALGRIND_MEMPOOL_ALLOC(set, block, Bump_BLOCKHDRSZ); + context->mem_allocated += blksize; /* the block is completely full */ @@ -455,6 +478,9 @@ BumpAllocFromNewBlock(MemoryContext context, Size size, int flags, if (block == NULL) return MemoryContextAllocationFailure(context, size, flags); + /* Make a vchunk covering the new block's header */ + VALGRIND_MEMPOOL_ALLOC(set, block, Bump_BLOCKHDRSZ); + context->mem_allocated += blksize; /* initialize the new block */ @@ -606,6 +632,9 @@ BumpBlockFree(BumpContext *set, BumpBlock *block) wipe_mem(block, ((char *) block->endptr - (char *) block)); #endif + /* As in aset.c, free block-header vchunks explicitly */ + VALGRIND_MEMPOOL_FREE(set, block); + free(block); } diff --git a/src/backend/utils/mmgr/generation.c b/src/backend/utils/mmgr/generation.c index 18679ad4f1e41..cfafc9bf0829d 100644 --- a/src/backend/utils/mmgr/generation.c +++ b/src/backend/utils/mmgr/generation.c @@ -45,6 +45,8 @@ #define Generation_BLOCKHDRSZ MAXALIGN(sizeof(GenerationBlock)) #define Generation_CHUNKHDRSZ sizeof(MemoryChunk) +#define FIRST_BLOCKHDRSZ (MAXALIGN(sizeof(GenerationContext)) + \ + Generation_BLOCKHDRSZ) #define Generation_CHUNK_FRACTION 8 @@ -221,6 +223,12 @@ GenerationContextCreate(MemoryContext parent, * Avoid writing code that can fail between here and MemoryContextCreate; * we'd leak the header if we ereport in this stretch. */ + + /* See comments about Valgrind interactions in aset.c */ + VALGRIND_CREATE_MEMPOOL(set, 0, false); + /* This vchunk covers the GenerationContext and the keeper block header */ + VALGRIND_MEMPOOL_ALLOC(set, set, FIRST_BLOCKHDRSZ); + dlist_init(&set->blocks); /* Fill in the initial block's block header */ @@ -309,6 +317,14 @@ GenerationReset(MemoryContext context) GenerationBlockFree(set, block); } + /* + * Instruct Valgrind to throw away all the vchunks associated with this + * context, except for the one covering the GenerationContext and + * keeper-block header. This gets rid of the vchunks for whatever user + * data is getting discarded by the context reset. + */ + VALGRIND_MEMPOOL_TRIM(set, set, FIRST_BLOCKHDRSZ); + /* set it so new allocations to make use of the keeper block */ set->block = KeeperBlock(set); @@ -329,6 +345,10 @@ GenerationDelete(MemoryContext context) { /* Reset to release all releasable GenerationBlocks */ GenerationReset(context); + + /* Destroy the vpool -- see notes in aset.c */ + VALGRIND_DESTROY_MEMPOOL(context); + /* And free the context header and keeper block */ free(context); } @@ -365,6 +385,9 @@ GenerationAllocLarge(MemoryContext context, Size size, int flags) if (block == NULL) return MemoryContextAllocationFailure(context, size, flags); + /* Make a vchunk covering the new block's header */ + VALGRIND_MEMPOOL_ALLOC(set, block, Generation_BLOCKHDRSZ); + context->mem_allocated += blksize; /* block with a single (used) chunk */ @@ -487,6 +510,9 @@ GenerationAllocFromNewBlock(MemoryContext context, Size size, int flags, if (block == NULL) return MemoryContextAllocationFailure(context, size, flags); + /* Make a vchunk covering the new block's header */ + VALGRIND_MEMPOOL_ALLOC(set, block, Generation_BLOCKHDRSZ); + context->mem_allocated += blksize; /* initialize the new block */ @@ -677,6 +703,9 @@ GenerationBlockFree(GenerationContext *set, GenerationBlock *block) wipe_mem(block, block->blksize); #endif + /* As in aset.c, free block-header vchunks explicitly */ + VALGRIND_MEMPOOL_FREE(set, block); + free(block); } diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index ce01dce9861da..073aa6c4fc5b3 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -8,6 +8,23 @@ * context-type-specific operations via the function pointers in a * context's MemoryContextMethods struct. * + * A note about Valgrind support: when USE_VALGRIND is defined, we provide + * support for memory leak tracking at the allocation-unit level. Valgrind + * does leak detection by tracking allocated "chunks", which can be grouped + * into "pools". The "chunk" terminology is overloaded, since we use that + * word for our allocation units, and it's sometimes important to distinguish + * those from the Valgrind objects that describe them. To reduce confusion, + * let's use the terms "vchunk" and "vpool" for the Valgrind objects. + * + * We use a separate vpool for each memory context. The context-type-specific + * code is responsible for creating and deleting the vpools, and also for + * creating vchunks to cover its management data structures such as block + * headers. (There must be a vchunk that includes every pointer we want + * Valgrind to consider for leak-tracking purposes.) This module creates + * and deletes the vchunks that cover the caller-visible allocated chunks. + * However, the context-type-specific code must handle cleaning up those + * vchunks too during memory context reset operations. + * * * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -418,8 +435,6 @@ MemoryContextResetOnly(MemoryContext context) context->methods->reset(context); context->isReset = true; - VALGRIND_DESTROY_MEMPOOL(context); - VALGRIND_CREATE_MEMPOOL(context, 0, false); } } @@ -526,8 +541,6 @@ MemoryContextDeleteOnly(MemoryContext context) context->ident = NULL; context->methods->delete_context(context); - - VALGRIND_DESTROY_MEMPOOL(context); } /* @@ -1170,8 +1183,6 @@ MemoryContextCreate(MemoryContext node, node->nextchild = NULL; node->allowInCritSection = false; } - - VALGRIND_CREATE_MEMPOOL(node, 0, false); } /* diff --git a/src/backend/utils/mmgr/slab.c b/src/backend/utils/mmgr/slab.c index d32c0d318fbf4..0e35abcf5a055 100644 --- a/src/backend/utils/mmgr/slab.c +++ b/src/backend/utils/mmgr/slab.c @@ -377,6 +377,11 @@ SlabContextCreate(MemoryContext parent, * we'd leak the header if we ereport in this stretch. */ + /* See comments about Valgrind interactions in aset.c */ + VALGRIND_CREATE_MEMPOOL(slab, 0, false); + /* This vchunk covers the SlabContext only */ + VALGRIND_MEMPOOL_ALLOC(slab, slab, sizeof(SlabContext)); + /* Fill in SlabContext-specific header fields */ slab->chunkSize = (uint32) chunkSize; slab->fullChunkSize = (uint32) fullChunkSize; @@ -451,6 +456,10 @@ SlabReset(MemoryContext context) #ifdef CLOBBER_FREED_MEMORY wipe_mem(block, slab->blockSize); #endif + + /* As in aset.c, free block-header vchunks explicitly */ + VALGRIND_MEMPOOL_FREE(slab, block); + free(block); context->mem_allocated -= slab->blockSize; } @@ -467,11 +476,23 @@ SlabReset(MemoryContext context) #ifdef CLOBBER_FREED_MEMORY wipe_mem(block, slab->blockSize); #endif + + /* As in aset.c, free block-header vchunks explicitly */ + VALGRIND_MEMPOOL_FREE(slab, block); + free(block); context->mem_allocated -= slab->blockSize; } } + /* + * Instruct Valgrind to throw away all the vchunks associated with this + * context, except for the one covering the SlabContext. This gets rid of + * the vchunks for whatever user data is getting discarded by the context + * reset. + */ + VALGRIND_MEMPOOL_TRIM(slab, slab, sizeof(SlabContext)); + slab->curBlocklistIndex = 0; Assert(context->mem_allocated == 0); @@ -486,6 +507,10 @@ SlabDelete(MemoryContext context) { /* Reset to release all the SlabBlocks */ SlabReset(context); + + /* Destroy the vpool -- see notes in aset.c */ + VALGRIND_DESTROY_MEMPOOL(context); + /* And free the context header */ free(context); } @@ -567,6 +592,9 @@ SlabAllocFromNewBlock(MemoryContext context, Size size, int flags) if (unlikely(block == NULL)) return MemoryContextAllocationFailure(context, size, flags); + /* Make a vchunk covering the new block's header */ + VALGRIND_MEMPOOL_ALLOC(slab, block, Slab_BLOCKHDRSZ); + block->slab = slab; context->mem_allocated += slab->blockSize; @@ -795,6 +823,10 @@ SlabFree(void *pointer) #ifdef CLOBBER_FREED_MEMORY wipe_mem(block, slab->blockSize); #endif + + /* As in aset.c, free block-header vchunks explicitly */ + VALGRIND_MEMPOOL_FREE(slab, block); + free(block); slab->header.mem_allocated -= slab->blockSize; } diff --git a/src/include/utils/memdebug.h b/src/include/utils/memdebug.h index 7309271834b9f..80692dcef9382 100644 --- a/src/include/utils/memdebug.h +++ b/src/include/utils/memdebug.h @@ -29,6 +29,7 @@ #define VALGRIND_MEMPOOL_ALLOC(context, addr, size) do {} while (0) #define VALGRIND_MEMPOOL_FREE(context, addr) do {} while (0) #define VALGRIND_MEMPOOL_CHANGE(context, optr, nptr, size) do {} while (0) +#define VALGRIND_MEMPOOL_TRIM(context, addr, size) do {} while (0) #endif From 9e9190154ef204a4e814dcc99f763398f7094667 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 2 Aug 2025 18:31:39 -0400 Subject: [PATCH 59/67] Fix MemoryContextAllocAligned's interaction with Valgrind. Arrange that only the "aligned chunk" part of the allocated space is included in a Valgrind vchunk. This suppresses complaints about that vchunk being possibly lost because PG is retaining only pointers to the aligned chunk. Also make sure that trailing wasted space is marked NOACCESS. As a tiny performance improvement, arrange that MCXT_ALLOC_ZERO zeroes only the returned "aligned chunk", not the wasted padding space. In passing, fix GetLocalBufferStorage to use MemoryContextAllocAligned instead of rolling its own implementation, which was equally broken according to Valgrind. Author: Tom Lane Reviewed-by: Andres Freund Discussion: https://postgr.es/m/285483.1746756246@sss.pgh.pa.us --- src/backend/storage/buffer/localbuf.c | 9 +++-- src/backend/utils/mmgr/alignedalloc.c | 18 +++++++++ src/backend/utils/mmgr/mcxt.c | 54 ++++++++++++++++----------- 3 files changed, 56 insertions(+), 25 deletions(-) diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 3da9c41ee1d7a..3c0d20f4659d2 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -932,10 +932,11 @@ GetLocalBufferStorage(void) num_bufs = Min(num_bufs, MaxAllocSize / BLCKSZ); /* Buffers should be I/O aligned. */ - cur_block = (char *) - TYPEALIGN(PG_IO_ALIGN_SIZE, - MemoryContextAlloc(LocalBufferContext, - num_bufs * BLCKSZ + PG_IO_ALIGN_SIZE)); + cur_block = MemoryContextAllocAligned(LocalBufferContext, + num_bufs * BLCKSZ, + PG_IO_ALIGN_SIZE, + 0); + next_buf_in_block = 0; num_bufs_in_block = num_bufs; } diff --git a/src/backend/utils/mmgr/alignedalloc.c b/src/backend/utils/mmgr/alignedalloc.c index 7eea695de62c5..b1be742691497 100644 --- a/src/backend/utils/mmgr/alignedalloc.c +++ b/src/backend/utils/mmgr/alignedalloc.c @@ -45,6 +45,15 @@ AlignedAllocFree(void *pointer) GetMemoryChunkContext(unaligned)->name, chunk); #endif + /* + * Create a dummy vchunk covering the start of the unaligned chunk, but + * not overlapping the aligned chunk. This will be freed while pfree'ing + * the unaligned chunk, keeping Valgrind happy. Then when we return to + * the outer pfree, that will clean up the vchunk for the aligned chunk. + */ + VALGRIND_MEMPOOL_ALLOC(GetMemoryChunkContext(unaligned), unaligned, + (char *) pointer - (char *) unaligned); + /* Recursively pfree the unaligned chunk */ pfree(unaligned); } @@ -123,6 +132,15 @@ AlignedAllocRealloc(void *pointer, Size size, int flags) VALGRIND_MAKE_MEM_DEFINED(pointer, old_size); memcpy(newptr, pointer, Min(size, old_size)); + /* + * Create a dummy vchunk covering the start of the old unaligned chunk, + * but not overlapping the aligned chunk. This will be freed while + * pfree'ing the old unaligned chunk, keeping Valgrind happy. Then when + * we return to repalloc, it will move the vchunk for the aligned chunk. + */ + VALGRIND_MEMPOOL_ALLOC(ctx, unaligned, + (char *) pointer - (char *) unaligned); + pfree(unaligned); return newptr; diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index 073aa6c4fc5b3..47fd774c7d280 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -1465,7 +1465,13 @@ MemoryContextAllocAligned(MemoryContext context, void *unaligned; void *aligned; - /* wouldn't make much sense to waste that much space */ + /* + * Restrict alignto to ensure that it can fit into the "value" field of + * the redirection MemoryChunk, and that the distance back to the start of + * the unaligned chunk will fit into the space available for that. This + * isn't a limitation in practice, since it wouldn't make much sense to + * waste that much space. + */ Assert(alignto < (128 * 1024 * 1024)); /* ensure alignto is a power of 2 */ @@ -1502,10 +1508,15 @@ MemoryContextAllocAligned(MemoryContext context, alloc_size += 1; #endif - /* perform the actual allocation */ - unaligned = MemoryContextAllocExtended(context, alloc_size, flags); + /* + * Perform the actual allocation, but do not pass down MCXT_ALLOC_ZERO. + * This ensures that wasted bytes beyond the aligned chunk do not become + * DEFINED. + */ + unaligned = MemoryContextAllocExtended(context, alloc_size, + flags & ~MCXT_ALLOC_ZERO); - /* set the aligned pointer */ + /* compute the aligned pointer */ aligned = (void *) TYPEALIGN(alignto, (char *) unaligned + sizeof(MemoryChunk)); @@ -1533,12 +1544,23 @@ MemoryContextAllocAligned(MemoryContext context, set_sentinel(aligned, size); #endif - /* Mark the bytes before the redirection header as noaccess */ - VALGRIND_MAKE_MEM_NOACCESS(unaligned, - (char *) alignedchunk - (char *) unaligned); + /* + * MemoryContextAllocExtended marked the whole unaligned chunk as a + * vchunk. Undo that, instead making just the aligned chunk be a vchunk. + * This prevents Valgrind from complaining that the vchunk is possibly + * leaked, since only pointers to the aligned chunk will exist. + * + * After these calls, the aligned chunk will be marked UNDEFINED, and all + * the rest of the unaligned chunk (the redirection chunk header, the + * padding bytes before it, and any wasted trailing bytes) will be marked + * NOACCESS, which is what we want. + */ + VALGRIND_MEMPOOL_FREE(context, unaligned); + VALGRIND_MEMPOOL_ALLOC(context, aligned, size); - /* Disallow access to the redirection chunk header. */ - VALGRIND_MAKE_MEM_NOACCESS(alignedchunk, sizeof(MemoryChunk)); + /* Now zero (and make DEFINED) just the aligned chunk, if requested */ + if ((flags & MCXT_ALLOC_ZERO) != 0) + MemSetAligned(aligned, 0, size); return aligned; } @@ -1572,16 +1594,12 @@ void pfree(void *pointer) { #ifdef USE_VALGRIND - MemoryContextMethodID method = GetMemoryChunkMethodID(pointer); MemoryContext context = GetMemoryChunkContext(pointer); #endif MCXT_METHOD(pointer, free_p) (pointer); -#ifdef USE_VALGRIND - if (method != MCTX_ALIGNED_REDIRECT_ID) - VALGRIND_MEMPOOL_FREE(context, pointer); -#endif + VALGRIND_MEMPOOL_FREE(context, pointer); } /* @@ -1591,9 +1609,6 @@ pfree(void *pointer) void * repalloc(void *pointer, Size size) { -#ifdef USE_VALGRIND - MemoryContextMethodID method = GetMemoryChunkMethodID(pointer); -#endif #if defined(USE_ASSERT_CHECKING) || defined(USE_VALGRIND) MemoryContext context = GetMemoryChunkContext(pointer); #endif @@ -1616,10 +1631,7 @@ repalloc(void *pointer, Size size) */ ret = MCXT_METHOD(pointer, realloc) (pointer, size, 0); -#ifdef USE_VALGRIND - if (method != MCTX_ALIGNED_REDIRECT_ID) - VALGRIND_MEMPOOL_CHANGE(context, pointer, ret, size); -#endif + VALGRIND_MEMPOOL_CHANGE(context, pointer, ret, size); return ret; } From e78d1d6d47dc7f04fb59fddfc38bab73ec8f1e82 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 2 Aug 2025 19:07:53 -0400 Subject: [PATCH 60/67] Fix assorted pretty-trivial memory leaks in the backend. In the current system architecture, none of these are worth obsessing over; most are once-per-process leaks. However, Valgrind complains about all of them, and if we get to using threads rather than processes for backend sessions, it will become more interesting to avoid per-session leaks. * Fix leaks in StartupXLOG() and ShutdownWalRecovery(). * Fix leakage of pq_mq_handle in a parallel worker. While at it, move mq_putmessage's "Assert(pq_mq_handle != NULL)" to someplace where it's not trivially useless. * Fix leak in logicalrep_worker_detach(). * Don't leak the startup-packet buffer in ProcessStartupPacket(). * Fix leak in evtcache.c's DecodeTextArrayToBitmapset(). If the presented array is toasted, this neglected to free the detoasted copy, which was then leaked into EventTriggerCacheContext. * I'm distressed by the amount of code that BuildEventTriggerCache is willing to run while switched into a long-lived cache context. Although the detoasted array is the only leak that Valgrind reports, let's tighten things up while we're here. (DecodeTextArrayToBitmapset is still run in the cache context, so doing this doesn't remove the need for the detoast fix. But it reduces the surface area for other leaks.) * load_domaintype_info() intentionally leaked some intermediate cruft into the long-lived DomainConstraintCache's memory context, reasoning that the amount of leakage will typically not be much so it's not worth doing a copyObject() of the final tree to avoid that. But Valgrind knows nothing of engineering tradeoffs and complains anyway. On the whole, the copyObject doesn't cost that much and this is surely not a performance-critical code path, so let's do it the clean way. * MarkGUCPrefixReserved didn't bother to clean up removed placeholder GUCs at all, which shows up as a leak in one regression test. It seems appropriate for it to do as much cleanup as define_custom_variable does when replacing placeholders, so factor that code out into a helper function. define_custom_variable's logic was one brick shy of a load too: it forgot to free the separate allocation for the placeholder's name. Author: Tom Lane Reviewed-by: Andres Freund Reviewed-by: Richard Guo Discussion: https://postgr.es/m/285483.1746756246@sss.pgh.pa.us --- src/backend/access/transam/xlog.c | 33 ++++++++++++------- src/backend/access/transam/xlogrecovery.c | 1 + src/backend/libpq/pqmq.c | 16 ++++++--- src/backend/replication/logical/launcher.c | 2 ++ src/backend/tcop/backend_startup.c | 33 +++++++++++++------ src/backend/utils/cache/evtcache.c | 16 +++++---- src/backend/utils/cache/typcache.c | 13 ++++---- src/backend/utils/misc/guc.c | 38 ++++++++++++++-------- 8 files changed, 99 insertions(+), 53 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index b0891998b243f..5553c20fee8ce 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -703,7 +703,7 @@ static void InitControlFile(uint64 sysidentifier, uint32 data_checksum_version); static void WriteControlFile(void); static void ReadControlFile(void); static void UpdateControlFile(void); -static char *str_time(pg_time_t tnow); +static char *str_time(pg_time_t tnow, char *buf, size_t bufsize); static int get_sync_bit(int method); @@ -5371,11 +5371,9 @@ BootStrapXLOG(uint32 data_checksum_version) } static char * -str_time(pg_time_t tnow) +str_time(pg_time_t tnow, char *buf, size_t bufsize) { - char *buf = palloc(128); - - pg_strftime(buf, 128, + pg_strftime(buf, bufsize, "%Y-%m-%d %H:%M:%S %Z", pg_localtime(&tnow, log_timezone)); @@ -5618,6 +5616,7 @@ StartupXLOG(void) XLogRecPtr missingContrecPtr; TransactionId oldestActiveXID; bool promoted = false; + char timebuf[128]; /* * We should have an aux process resource owner to use, and we should not @@ -5646,25 +5645,29 @@ StartupXLOG(void) */ ereport(IsPostmasterEnvironment ? LOG : NOTICE, (errmsg("database system was shut down at %s", - str_time(ControlFile->time)))); + str_time(ControlFile->time, + timebuf, sizeof(timebuf))))); break; case DB_SHUTDOWNED_IN_RECOVERY: ereport(LOG, (errmsg("database system was shut down in recovery at %s", - str_time(ControlFile->time)))); + str_time(ControlFile->time, + timebuf, sizeof(timebuf))))); break; case DB_SHUTDOWNING: ereport(LOG, (errmsg("database system shutdown was interrupted; last known up at %s", - str_time(ControlFile->time)))); + str_time(ControlFile->time, + timebuf, sizeof(timebuf))))); break; case DB_IN_CRASH_RECOVERY: ereport(LOG, (errmsg("database system was interrupted while in recovery at %s", - str_time(ControlFile->time)), + str_time(ControlFile->time, + timebuf, sizeof(timebuf))), errhint("This probably means that some data is corrupted and" " you will have to use the last backup for recovery."))); break; @@ -5672,7 +5675,8 @@ StartupXLOG(void) case DB_IN_ARCHIVE_RECOVERY: ereport(LOG, (errmsg("database system was interrupted while in recovery at log time %s", - str_time(ControlFile->checkPointCopy.time)), + str_time(ControlFile->checkPointCopy.time, + timebuf, sizeof(timebuf))), errhint("If this has occurred more than once some data might be corrupted" " and you might need to choose an earlier recovery target."))); break; @@ -5680,7 +5684,8 @@ StartupXLOG(void) case DB_IN_PRODUCTION: ereport(LOG, (errmsg("database system was interrupted; last known up at %s", - str_time(ControlFile->time)))); + str_time(ControlFile->time, + timebuf, sizeof(timebuf))))); break; default: @@ -6325,6 +6330,12 @@ StartupXLOG(void) */ CompleteCommitTsInitialization(); + /* Clean up EndOfWalRecoveryInfo data to appease Valgrind leak checking */ + if (endOfRecoveryInfo->lastPage) + pfree(endOfRecoveryInfo->lastPage); + pfree(endOfRecoveryInfo->recoveryStopReason); + pfree(endOfRecoveryInfo); + /* * All done with end-of-recovery actions. * diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index e8f3ba00caae7..f23ec8969c27d 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -1626,6 +1626,7 @@ ShutdownWalRecovery(void) close(readFile); readFile = -1; } + pfree(xlogreader->private_data); XLogReaderFree(xlogreader); XLogPrefetcherFree(xlogprefetcher); diff --git a/src/backend/libpq/pqmq.c b/src/backend/libpq/pqmq.c index f1a08bc32ca17..5f39949a36773 100644 --- a/src/backend/libpq/pqmq.c +++ b/src/backend/libpq/pqmq.c @@ -23,7 +23,7 @@ #include "tcop/tcopprot.h" #include "utils/builtins.h" -static shm_mq_handle *pq_mq_handle; +static shm_mq_handle *pq_mq_handle = NULL; static bool pq_mq_busy = false; static pid_t pq_mq_parallel_leader_pid = 0; static ProcNumber pq_mq_parallel_leader_proc_number = INVALID_PROC_NUMBER; @@ -66,7 +66,11 @@ pq_redirect_to_shm_mq(dsm_segment *seg, shm_mq_handle *mqh) static void pq_cleanup_redirect_to_shm_mq(dsm_segment *seg, Datum arg) { - pq_mq_handle = NULL; + if (pq_mq_handle != NULL) + { + pfree(pq_mq_handle); + pq_mq_handle = NULL; + } whereToSendOutput = DestNone; } @@ -131,8 +135,11 @@ mq_putmessage(char msgtype, const char *s, size_t len) if (pq_mq_busy) { if (pq_mq_handle != NULL) + { shm_mq_detach(pq_mq_handle); - pq_mq_handle = NULL; + pfree(pq_mq_handle); + pq_mq_handle = NULL; + } return EOF; } @@ -152,8 +159,6 @@ mq_putmessage(char msgtype, const char *s, size_t len) iov[1].data = s; iov[1].len = len; - Assert(pq_mq_handle != NULL); - for (;;) { /* @@ -161,6 +166,7 @@ mq_putmessage(char msgtype, const char *s, size_t len) * that the shared memory value is updated before we send the parallel * message signal right after this. */ + Assert(pq_mq_handle != NULL); result = shm_mq_sendv(pq_mq_handle, iov, 2, true, true); if (pq_mq_parallel_leader_pid != 0) diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c index 742d9ba68e900..37377f7eb636c 100644 --- a/src/backend/replication/logical/launcher.c +++ b/src/backend/replication/logical/launcher.c @@ -790,6 +790,8 @@ logicalrep_worker_detach(void) } LWLockRelease(LogicalRepWorkerLock); + + list_free(workers); } /* Block concurrent access. */ diff --git a/src/backend/tcop/backend_startup.c b/src/backend/tcop/backend_startup.c index ad0af5edc1f21..14d5fc0b1965a 100644 --- a/src/backend/tcop/backend_startup.c +++ b/src/backend/tcop/backend_startup.c @@ -492,7 +492,7 @@ static int ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) { int32 len; - char *buf; + char *buf = NULL; ProtocolVersion proto; MemoryContext oldcontext; @@ -516,7 +516,7 @@ ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) * scanners, which may be less benign, but it's not really our job to * notice those.) */ - return STATUS_ERROR; + goto fail; } if (pq_getbytes(((char *) &len) + 1, 3) == EOF) @@ -526,7 +526,7 @@ ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("incomplete startup packet"))); - return STATUS_ERROR; + goto fail; } len = pg_ntoh32(len); @@ -538,7 +538,7 @@ ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("invalid length of startup packet"))); - return STATUS_ERROR; + goto fail; } /* @@ -554,7 +554,7 @@ ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("incomplete startup packet"))); - return STATUS_ERROR; + goto fail; } pq_endmsgread(); @@ -568,7 +568,7 @@ ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) { ProcessCancelRequestPacket(port, buf, len); /* Not really an error, but we don't want to proceed further */ - return STATUS_ERROR; + goto fail; } if (proto == NEGOTIATE_SSL_CODE && !ssl_done) @@ -607,14 +607,16 @@ ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) ereport(COMMERROR, (errcode_for_socket_access(), errmsg("failed to send SSL negotiation response: %m"))); - return STATUS_ERROR; /* close the connection */ + goto fail; /* close the connection */ } #ifdef USE_SSL if (SSLok == 'S' && secure_open_server(port) == -1) - return STATUS_ERROR; + goto fail; #endif + pfree(buf); + /* * At this point we should have no data already buffered. If we do, * it was received before we performed the SSL handshake, so it wasn't @@ -661,14 +663,16 @@ ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) ereport(COMMERROR, (errcode_for_socket_access(), errmsg("failed to send GSSAPI negotiation response: %m"))); - return STATUS_ERROR; /* close the connection */ + goto fail; /* close the connection */ } #ifdef ENABLE_GSS if (GSSok == 'G' && secure_open_gssapi(port) == -1) - return STATUS_ERROR; + goto fail; #endif + pfree(buf); + /* * At this point we should have no data already buffered. If we do, * it was received before we performed the GSS handshake, so it wasn't @@ -863,7 +867,16 @@ ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done) */ MemoryContextSwitchTo(oldcontext); + pfree(buf); + return STATUS_OK; + +fail: + /* be tidy, just to avoid Valgrind complaints */ + if (buf) + pfree(buf); + + return STATUS_ERROR; } /* diff --git a/src/backend/utils/cache/evtcache.c b/src/backend/utils/cache/evtcache.c index ce596bf563856..b9d5a5998be50 100644 --- a/src/backend/utils/cache/evtcache.c +++ b/src/backend/utils/cache/evtcache.c @@ -78,7 +78,6 @@ BuildEventTriggerCache(void) { HASHCTL ctl; HTAB *cache; - MemoryContext oldcontext; Relation rel; Relation irel; SysScanDesc scan; @@ -110,9 +109,6 @@ BuildEventTriggerCache(void) (Datum) 0); } - /* Switch to correct memory context. */ - oldcontext = MemoryContextSwitchTo(EventTriggerCacheContext); - /* Prevent the memory context from being nuked while we're rebuilding. */ EventTriggerCacheState = ETCS_REBUILD_STARTED; @@ -145,6 +141,7 @@ BuildEventTriggerCache(void) bool evttags_isnull; EventTriggerCacheEntry *entry; bool found; + MemoryContext oldcontext; /* Get next tuple. */ tup = systable_getnext_ordered(scan, ForwardScanDirection); @@ -171,6 +168,9 @@ BuildEventTriggerCache(void) else continue; + /* Switch to correct memory context. */ + oldcontext = MemoryContextSwitchTo(EventTriggerCacheContext); + /* Allocate new cache item. */ item = palloc0(sizeof(EventTriggerCacheItem)); item->fnoid = form->evtfoid; @@ -188,6 +188,9 @@ BuildEventTriggerCache(void) entry->triggerlist = lappend(entry->triggerlist, item); else entry->triggerlist = list_make1(item); + + /* Restore previous memory context. */ + MemoryContextSwitchTo(oldcontext); } /* Done with pg_event_trigger scan. */ @@ -195,9 +198,6 @@ BuildEventTriggerCache(void) index_close(irel, AccessShareLock); relation_close(rel, AccessShareLock); - /* Restore previous memory context. */ - MemoryContextSwitchTo(oldcontext); - /* Install new cache. */ EventTriggerCache = cache; @@ -240,6 +240,8 @@ DecodeTextArrayToBitmapset(Datum array) } pfree(elems); + if ((Pointer) arr != DatumGetPointer(array)) + pfree(arr); return bms; } diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c index f9aec38a11fb3..6a347698edffe 100644 --- a/src/backend/utils/cache/typcache.c +++ b/src/backend/utils/cache/typcache.c @@ -1171,9 +1171,6 @@ load_domaintype_info(TypeCacheEntry *typentry) elog(ERROR, "domain \"%s\" constraint \"%s\" has NULL conbin", NameStr(typTup->typname), NameStr(c->conname)); - /* Convert conbin to C string in caller context */ - constring = TextDatumGetCString(val); - /* Create the DomainConstraintCache object and context if needed */ if (dcc == NULL) { @@ -1189,9 +1186,8 @@ load_domaintype_info(TypeCacheEntry *typentry) dcc->dccRefCount = 0; } - /* Create node trees in DomainConstraintCache's context */ - oldcxt = MemoryContextSwitchTo(dcc->dccContext); - + /* Convert conbin to a node tree, still in caller's context */ + constring = TextDatumGetCString(val); check_expr = (Expr *) stringToNode(constring); /* @@ -1206,10 +1202,13 @@ load_domaintype_info(TypeCacheEntry *typentry) */ check_expr = expression_planner(check_expr); + /* Create only the minimally needed stuff in dccContext */ + oldcxt = MemoryContextSwitchTo(dcc->dccContext); + r = makeNode(DomainConstraintState); r->constrainttype = DOM_CONSTRAINT_CHECK; r->name = pstrdup(NameStr(c->conname)); - r->check_expr = check_expr; + r->check_expr = copyObject(check_expr); r->check_exprstate = NULL; MemoryContextSwitchTo(oldcxt); diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index ce5449f287853..e404c345e6eab 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -249,6 +249,7 @@ static void reapply_stacked_values(struct config_generic *variable, const char *curvalue, GucContext curscontext, GucSource cursource, Oid cursrole); +static void free_placeholder(struct config_string *pHolder); static bool validate_option_array_item(const char *name, const char *value, bool skipIfNoPermissions); static void write_auto_conf_file(int fd, const char *filename, ConfigVariable *head); @@ -5023,16 +5024,8 @@ define_custom_variable(struct config_generic *variable) set_config_sourcefile(name, pHolder->gen.sourcefile, pHolder->gen.sourceline); - /* - * Free up as much as we conveniently can of the placeholder structure. - * (This neglects any stack items, so it's possible for some memory to be - * leaked. Since this can only happen once per session per variable, it - * doesn't seem worth spending much code on.) - */ - set_string_field(pHolder, pHolder->variable, NULL); - set_string_field(pHolder, &pHolder->reset_val, NULL); - - guc_free(pHolder); + /* Now we can free the no-longer-referenced placeholder variable */ + free_placeholder(pHolder); } /* @@ -5131,6 +5124,25 @@ reapply_stacked_values(struct config_generic *variable, } } +/* + * Free up a no-longer-referenced placeholder GUC variable. + * + * This neglects any stack items, so it's possible for some memory to be + * leaked. Since this can only happen once per session per variable, it + * doesn't seem worth spending much code on. + */ +static void +free_placeholder(struct config_string *pHolder) +{ + /* Placeholders are always STRING type, so free their values */ + Assert(pHolder->gen.vartype == PGC_STRING); + set_string_field(pHolder, pHolder->variable, NULL); + set_string_field(pHolder, &pHolder->reset_val, NULL); + + guc_free(unconstify(char *, pHolder->gen.name)); + guc_free(pHolder); +} + /* * Functions for extensions to call to define their custom GUC variables. */ @@ -5291,9 +5303,7 @@ MarkGUCPrefixReserved(const char *className) /* * Check for existing placeholders. We must actually remove invalid - * placeholders, else future parallel worker startups will fail. (We - * don't bother trying to free associated memory, since this shouldn't - * happen often.) + * placeholders, else future parallel worker startups will fail. */ hash_seq_init(&status, guc_hashtab); while ((hentry = (GUCHashEntry *) hash_seq_search(&status)) != NULL) @@ -5317,6 +5327,8 @@ MarkGUCPrefixReserved(const char *className) NULL); /* Remove it from any lists it's in, too */ RemoveGUCFromLists(var); + /* And free it */ + free_placeholder((struct config_string *) var); } } From db01c90b2f024298b08dca8aed6b43a2347dee0e Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 2 Aug 2025 19:32:12 -0400 Subject: [PATCH 61/67] Silence Valgrind leakage complaints in more-or-less-hackish ways. These changes don't actually fix any leaks. They just make sure that Valgrind will find pointers to data structures that remain allocated at process exit, and thus not falsely complain about leaks. In particular, we are trying to avoid situations where there is no pointer to the beginning of an allocated block (except possibly within the block itself, which Valgrind won't count). * Because dynahash.c never frees hashtable storage except by deleting the whole hashtable context, it doesn't bother to track the individual blocks of elements allocated by element_alloc(). This results in "possibly lost" complaints from Valgrind except when the first element of each block is actively in use. (Otherwise it'll be on a freelist, but very likely only reachable via "interior pointers" within element blocks, which doesn't satisfy Valgrind.) To fix, if we're building with USE_VALGRIND, expend an extra pointer's worth of space in each element block so that we can chain them all together from the HTAB header. Skip this in shared hashtables though: Valgrind doesn't track those, and we'd need additional locking to make it safe to manipulate a shared chain. While here, update a comment obsoleted by 9c911ec06. * Put the dlist_node fields of catctup and catclist structs first. This ensures that the dlist pointers point to the starts of these palloc blocks, and thus that Valgrind won't consider them "possibly lost". * The postmaster's PMChild structs and the autovac launcher's avl_dbase structs also have the dlist_node-is-not-first problem, but putting it first still wouldn't silence the warning because we bulk-allocate those structs in an array, so that Valgrind sees a single allocation. Commonly the first array element will be pointed to only from some later element, so that the reference would be an interior pointer even if it pointed to the array start. (This is the same issue as for dynahash elements.) Since these are pretty simple data structures, I don't feel too bad about faking out Valgrind by just keeping a static pointer to the array start. (This is all quite hacky, and it's not hard to imagine usages where we'd need some other idea in order to have reasonable leak tracking of structures that are only accessible via dlist_node lists. But these changes seem to be enough to silence this class of leakage complaints for the moment.) * Free a couple of data structures manually near the end of an autovacuum worker's run when USE_VALGRIND, and ensure that the final vac_update_datfrozenxid() call is done in a non-permanent context. This doesn't have any real effect on the process's total memory consumption, since we're going to exit as soon as that last transaction is done. But it does pacify Valgrind. * Valgrind complains about the postmaster's socket-files and lock-files lists being leaked, which we can silence by just not nulling out the static pointers to them. * Valgrind seems not to consider the global "environ" variable as a valid root pointer; so when we allocate a new environment array, it claims that data is leaked. To fix that, keep our own statically-allocated copy of the pointer, similarly to the previous item. Author: Tom Lane Reviewed-by: Andres Freund Discussion: https://postgr.es/m/285483.1746756246@sss.pgh.pa.us --- src/backend/libpq/pqcomm.c | 1 - src/backend/postmaster/autovacuum.c | 26 ++++++++++++++- src/backend/postmaster/pmchild.c | 18 +++++++++- src/backend/utils/hash/dynahash.c | 52 +++++++++++++++++++++++++---- src/backend/utils/init/miscinit.c | 1 - src/backend/utils/misc/ps_status.c | 16 +++++++++ src/include/utils/catcache.h | 23 ++++++++----- 7 files changed, 118 insertions(+), 19 deletions(-) diff --git a/src/backend/libpq/pqcomm.c b/src/backend/libpq/pqcomm.c index e5171467de18d..25f739a6a17d4 100644 --- a/src/backend/libpq/pqcomm.c +++ b/src/backend/libpq/pqcomm.c @@ -858,7 +858,6 @@ RemoveSocketFiles(void) (void) unlink(sock_path); } /* Since we're about to exit, no need to reclaim storage */ - sock_paths = NIL; } diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 8908603464c5c..ff96b36d71025 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -310,6 +310,16 @@ static AutoVacuumShmemStruct *AutoVacuumShmem; static dlist_head DatabaseList = DLIST_STATIC_INIT(DatabaseList); static MemoryContext DatabaseListCxt = NULL; +/* + * Dummy pointer to persuade Valgrind that we've not leaked the array of + * avl_dbase structs. Make it global to ensure the compiler doesn't + * optimize it away. + */ +#ifdef USE_VALGRIND +extern avl_dbase *avl_dbase_array; +avl_dbase *avl_dbase_array; +#endif + /* Pointer to my own WorkerInfo, valid on each worker */ static WorkerInfo MyWorkerInfo = NULL; @@ -1020,6 +1030,10 @@ rebuild_database_list(Oid newdb) /* put all the hash elements into an array */ dbary = palloc(nelems * sizeof(avl_dbase)); + /* keep Valgrind quiet */ +#ifdef USE_VALGRIND + avl_dbase_array = dbary; +#endif i = 0; hash_seq_init(&seq, dbhash); @@ -2565,8 +2579,18 @@ do_autovacuum(void) /* * We leak table_toast_map here (among other things), but since we're - * going away soon, it's not a problem. + * going away soon, it's not a problem normally. But when using Valgrind, + * release some stuff to reduce complaints about leaked storage. */ +#ifdef USE_VALGRIND + hash_destroy(table_toast_map); + FreeTupleDesc(pg_class_desc); + if (bstrategy) + pfree(bstrategy); +#endif + + /* Run the rest in xact context, mainly to avoid Valgrind leak warnings */ + MemoryContextSwitchTo(TopTransactionContext); /* * Update pg_database.datfrozenxid, and truncate pg_xact if possible. We diff --git a/src/backend/postmaster/pmchild.c b/src/backend/postmaster/pmchild.c index cde1d23a4ca8b..584bb58c8abaf 100644 --- a/src/backend/postmaster/pmchild.c +++ b/src/backend/postmaster/pmchild.c @@ -59,6 +59,17 @@ NON_EXEC_STATIC int num_pmchild_slots = 0; */ dlist_head ActiveChildList; +/* + * Dummy pointer to persuade Valgrind that we've not leaked the array of + * PMChild structs. Make it global to ensure the compiler doesn't + * optimize it away. + */ +#ifdef USE_VALGRIND +extern PMChild *pmchild_array; +PMChild *pmchild_array; +#endif + + /* * MaxLivePostmasterChildren * @@ -125,8 +136,13 @@ InitPostmasterChildSlots(void) for (int i = 0; i < BACKEND_NUM_TYPES; i++) num_pmchild_slots += pmchild_pools[i].size; - /* Initialize them */ + /* Allocate enough slots, and make sure Valgrind doesn't complain */ slots = palloc(num_pmchild_slots * sizeof(PMChild)); +#ifdef USE_VALGRIND + pmchild_array = slots; +#endif + + /* Initialize them */ slotno = 0; for (int btype = 0; btype < BACKEND_NUM_TYPES; btype++) { diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index 42e9be274fc6a..81da03629f0d2 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -22,10 +22,11 @@ * lookup key's hash value as a partition number --- this will work because * of the way calc_bucket() maps hash values to bucket numbers. * - * For hash tables in shared memory, the memory allocator function should - * match malloc's semantics of returning NULL on failure. For hash tables - * in local memory, we typically use palloc() which will throw error on - * failure. The code in this file has to cope with both cases. + * The memory allocator function should match malloc's semantics of returning + * NULL on failure. (This is essential for hash tables in shared memory. + * For hash tables in local memory, we used to use palloc() which will throw + * error on failure; but we no longer do, so it's untested whether this + * module will still cope with that behavior.) * * dynahash.c provides support for these types of lookup keys: * @@ -98,6 +99,7 @@ #include "access/xact.h" #include "common/hashfn.h" +#include "lib/ilist.h" #include "port/pg_bitutils.h" #include "storage/shmem.h" #include "storage/spin.h" @@ -236,6 +238,16 @@ struct HTAB Size keysize; /* hash key length in bytes */ long ssize; /* segment size --- must be power of 2 */ int sshift; /* segment shift = log2(ssize) */ + + /* + * In a USE_VALGRIND build, non-shared hashtables keep an slist chain of + * all the element blocks they have allocated. This pacifies Valgrind, + * which would otherwise often claim that the element blocks are "possibly + * lost" for lack of any non-interior pointers to their starts. + */ +#ifdef USE_VALGRIND + slist_head element_blocks; +#endif }; /* @@ -1712,6 +1724,8 @@ element_alloc(HTAB *hashp, int nelem, int freelist_idx) { HASHHDR *hctl = hashp->hctl; Size elementSize; + Size requestSize; + char *allocedBlock; HASHELEMENT *firstElement; HASHELEMENT *tmpElement; HASHELEMENT *prevElement; @@ -1723,12 +1737,38 @@ element_alloc(HTAB *hashp, int nelem, int freelist_idx) /* Each element has a HASHELEMENT header plus user data. */ elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctl->entrysize); + requestSize = nelem * elementSize; + + /* Add space for slist_node list link if we need one. */ +#ifdef USE_VALGRIND + if (!hashp->isshared) + requestSize += MAXALIGN(sizeof(slist_node)); +#endif + + /* Allocate the memory. */ CurrentDynaHashCxt = hashp->hcxt; - firstElement = (HASHELEMENT *) hashp->alloc(nelem * elementSize); + allocedBlock = hashp->alloc(requestSize); - if (!firstElement) + if (!allocedBlock) return false; + /* + * If USE_VALGRIND, each allocated block of elements of a non-shared + * hashtable is chained into a list, so that Valgrind won't think it's + * been leaked. + */ +#ifdef USE_VALGRIND + if (hashp->isshared) + firstElement = (HASHELEMENT *) allocedBlock; + else + { + slist_push_head(&hashp->element_blocks, (slist_node *) allocedBlock); + firstElement = (HASHELEMENT *) (allocedBlock + MAXALIGN(sizeof(slist_node))); + } +#else + firstElement = (HASHELEMENT *) allocedBlock; +#endif + /* prepare to link all the new entries into the freelist */ prevElement = NULL; tmpElement = firstElement; diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c index 43b4dbccc3de6..65d8cbfaed585 100644 --- a/src/backend/utils/init/miscinit.c +++ b/src/backend/utils/init/miscinit.c @@ -1183,7 +1183,6 @@ UnlinkLockFiles(int status, Datum arg) /* Should we complain if the unlink fails? */ } /* Since we're about to exit, no need to reclaim storage */ - lock_files = NIL; /* * Lock file removal should always be the last externally visible action diff --git a/src/backend/utils/misc/ps_status.c b/src/backend/utils/misc/ps_status.c index e08b26e8c14f2..4df25944deb33 100644 --- a/src/backend/utils/misc/ps_status.c +++ b/src/backend/utils/misc/ps_status.c @@ -100,6 +100,17 @@ static void flush_ps_display(void); static int save_argc; static char **save_argv; +/* + * Valgrind seems not to consider the global "environ" variable as a valid + * root pointer; so when we allocate a new environment array, it claims that + * data is leaked. To fix that, keep our own statically-allocated copy of the + * pointer. (Oddly, this doesn't seem to be a problem for "argv".) + */ +#if defined(PS_USE_CLOBBER_ARGV) && defined(USE_VALGRIND) +extern char **ps_status_new_environ; +char **ps_status_new_environ; +#endif + /* * Call this early in startup to save the original argc/argv values. @@ -206,6 +217,11 @@ save_ps_display_args(int argc, char **argv) } new_environ[i] = NULL; environ = new_environ; + + /* See notes about Valgrind above. */ +#ifdef USE_VALGRIND + ps_status_new_environ = new_environ; +#endif } /* diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h index 277ec33c00bac..00808e23f49b8 100644 --- a/src/include/utils/catcache.h +++ b/src/include/utils/catcache.h @@ -87,6 +87,14 @@ typedef struct catcache typedef struct catctup { + /* + * Each tuple in a cache is a member of a dlist that stores the elements + * of its hash bucket. We keep each dlist in LRU order to speed repeated + * lookups. Keep the dlist_node field first so that Valgrind understands + * the struct is reachable. + */ + dlist_node cache_elem; /* list member of per-bucket list */ + int ct_magic; /* for identifying CatCTup entries */ #define CT_MAGIC 0x57261502 @@ -98,13 +106,6 @@ typedef struct catctup */ Datum keys[CATCACHE_MAXKEYS]; - /* - * Each tuple in a cache is a member of a dlist that stores the elements - * of its hash bucket. We keep each dlist in LRU order to speed repeated - * lookups. - */ - dlist_node cache_elem; /* list member of per-bucket list */ - /* * A tuple marked "dead" must not be returned by subsequent searches. * However, it won't be physically deleted from the cache until its @@ -158,13 +159,17 @@ typedef struct catctup */ typedef struct catclist { + /* + * Keep the dlist_node field first so that Valgrind understands the struct + * is reachable. + */ + dlist_node cache_elem; /* list member of per-catcache list */ + int cl_magic; /* for identifying CatCList entries */ #define CL_MAGIC 0x52765103 uint32 hash_value; /* hash value for lookup keys */ - dlist_node cache_elem; /* list member of per-catcache list */ - /* * Lookup keys for the entry, with the first nkeys elements being valid. * All by-reference are separately allocated. From 9f18fa9995628fef752d704d874eeed0bab815e5 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 2 Aug 2025 19:39:03 -0400 Subject: [PATCH 62/67] Reduce leakage during PL/pgSQL function compilation. format_procedure leaks memory, so run it in a short-lived context not the session-lifespan cache context for the PL/pgSQL function. parse_datatype called the core parser in the function's cache context, thus leaking potentially a lot of storage into that context. We were also being a bit careless with the TypeName structures made in that code path and others. Most of the time we don't need to retain the TypeName, so make sure it is made in the short-lived temp context, and copy it only if we do need to retain it. These are far from the only leaks in PL/pgSQL compilation, but they're the biggest as far as I've seen, and further improvement looks like it'd require delicate and bug-prone surgery. Author: Tom Lane Reviewed-by: Andres Freund Discussion: https://postgr.es/m/285483.1746756246@sss.pgh.pa.us --- src/pl/plpgsql/src/pl_comp.c | 28 ++++++++++++++++++++++------ src/pl/plpgsql/src/pl_gram.y | 8 +++++++- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c index ee961425a5b7e..f6976689a6927 100644 --- a/src/pl/plpgsql/src/pl_comp.c +++ b/src/pl/plpgsql/src/pl_comp.c @@ -177,6 +177,7 @@ plpgsql_compile_callback(FunctionCallInfo fcinfo, yyscan_t scanner; Datum prosrcdatum; char *proc_source; + char *proc_signature; HeapTuple typeTup; Form_pg_type typeStruct; PLpgSQL_variable *var; @@ -223,6 +224,9 @@ plpgsql_compile_callback(FunctionCallInfo fcinfo, plpgsql_check_syntax = forValidator; plpgsql_curr_compile = function; + /* format_procedure leaks memory, so run it in temp context */ + proc_signature = format_procedure(fcinfo->flinfo->fn_oid); + /* * All the permanent output of compilation (e.g. parse tree) is kept in a * per-function memory context, so it can be reclaimed easily. @@ -237,7 +241,7 @@ plpgsql_compile_callback(FunctionCallInfo fcinfo, ALLOCSET_DEFAULT_SIZES); plpgsql_compile_tmp_cxt = MemoryContextSwitchTo(func_cxt); - function->fn_signature = format_procedure(fcinfo->flinfo->fn_oid); + function->fn_signature = pstrdup(proc_signature); MemoryContextSetIdentifier(func_cxt, function->fn_signature); function->fn_oid = fcinfo->flinfo->fn_oid; function->fn_input_collation = fcinfo->fncollation; @@ -1673,6 +1677,11 @@ plpgsql_parse_wordrowtype(char *ident) { Oid classOid; Oid typOid; + TypeName *typName; + MemoryContext oldCxt; + + /* Avoid memory leaks in long-term function context */ + oldCxt = MemoryContextSwitchTo(plpgsql_compile_tmp_cxt); /* * Look up the relation. Note that because relation rowtypes have the @@ -1695,9 +1704,12 @@ plpgsql_parse_wordrowtype(char *ident) errmsg("relation \"%s\" does not have a composite type", ident))); + typName = makeTypeName(ident); + + MemoryContextSwitchTo(oldCxt); + /* Build and return the row type struct */ - return plpgsql_build_datatype(typOid, -1, InvalidOid, - makeTypeName(ident)); + return plpgsql_build_datatype(typOid, -1, InvalidOid, typName); } /* ---------- @@ -1711,6 +1723,7 @@ plpgsql_parse_cwordrowtype(List *idents) Oid classOid; Oid typOid; RangeVar *relvar; + TypeName *typName; MemoryContext oldCxt; /* @@ -1733,11 +1746,12 @@ plpgsql_parse_cwordrowtype(List *idents) errmsg("relation \"%s\" does not have a composite type", relvar->relname))); + typName = makeTypeNameFromNameList(idents); + MemoryContextSwitchTo(oldCxt); /* Build and return the row type struct */ - return plpgsql_build_datatype(typOid, -1, InvalidOid, - makeTypeNameFromNameList(idents)); + return plpgsql_build_datatype(typOid, -1, InvalidOid, typName); } /* @@ -1952,6 +1966,8 @@ plpgsql_build_recfield(PLpgSQL_rec *rec, const char *fldname) * origtypname is the parsed form of what the user wrote as the type name. * It can be NULL if the type could not be a composite type, or if it was * identified by OID to begin with (e.g., it's a function argument type). + * origtypname is in short-lived storage and must be copied if we choose + * to incorporate it into the function's parse tree. */ PLpgSQL_type * plpgsql_build_datatype(Oid typeOid, int32 typmod, @@ -2070,7 +2086,7 @@ build_datatype(HeapTuple typeTup, int32 typmod, errmsg("type %s is not composite", format_type_be(typ->typoid)))); - typ->origtypname = origtypname; + typ->origtypname = copyObject(origtypname); typ->tcache = typentry; typ->tupdesc_id = typentry->tupDesc_identifier; } diff --git a/src/pl/plpgsql/src/pl_gram.y b/src/pl/plpgsql/src/pl_gram.y index 7b672ea5179a6..17568d82554d2 100644 --- a/src/pl/plpgsql/src/pl_gram.y +++ b/src/pl/plpgsql/src/pl_gram.y @@ -3853,6 +3853,7 @@ parse_datatype(const char *string, int location, yyscan_t yyscanner) int32 typmod; sql_error_callback_arg cbarg; ErrorContextCallback syntax_errcontext; + MemoryContext oldCxt; cbarg.location = location; cbarg.yyscanner = yyscanner; @@ -3862,9 +3863,14 @@ parse_datatype(const char *string, int location, yyscan_t yyscanner) syntax_errcontext.previous = error_context_stack; error_context_stack = &syntax_errcontext; - /* Let the main parser try to parse it under standard SQL rules */ + /* + * Let the main parser try to parse it under standard SQL rules. The + * parser leaks memory, so run it in temp context. + */ + oldCxt = MemoryContextSwitchTo(plpgsql_compile_tmp_cxt); typeName = typeStringToTypeName(string, NULL); typenameTypeIdAndMod(NULL, typeName, &type_id, &typmod); + MemoryContextSwitchTo(oldCxt); /* Restore former ereport callback */ error_context_stack = syntax_errcontext.previous; From 2c7b4ad24dda86a73d80df063e9a56c3ecb1e4bb Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 2 Aug 2025 19:43:04 -0400 Subject: [PATCH 63/67] Suppress complaints about leaks in function cache loading. PL/pgSQL and SQL-function parsing leak some stuff into the long-lived function cache context. This isn't really a huge practical problem, since it's not a large amount of data and the cruft will be recovered if we have to re-parse the function. It's not clear that it's worth working any harder than the previous patch did to eliminate these leak complaints, so instead silence them with a suppression rule. This suppression rule also hides the fact that CachedFunction structs are intentionally leaked in some cases because we're unsure if any fn_extra pointers remain. That might be nice to do something about eventually, but it's not clear how. Author: Tom Lane Reviewed-by: Andres Freund Discussion: https://postgr.es/m/285483.1746756246@sss.pgh.pa.us --- src/tools/valgrind.supp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/tools/valgrind.supp b/src/tools/valgrind.supp index 2ad5b81526d3f..fad20c8f70804 100644 --- a/src/tools/valgrind.supp +++ b/src/tools/valgrind.supp @@ -194,3 +194,24 @@ Memcheck:Addr8 fun:pg_numa_touch_mem_if_required } + + +# Memory-leak suppressions +# Note that a suppression rule will silence complaints about memory blocks +# allocated in matching places, but it won't prevent "indirectly lost" +# complaints about blocks that are only reachable via the suppressed blocks. + +# Suppress complaints about stuff leaked during function cache loading. +# Both the PL/pgSQL and SQL-function parsing processes generate some cruft +# within the function's cache context, which doesn't seem worth the trouble +# to get rid of. Moreover, there are cases where CachedFunction structs +# are intentionally leaked because we're unsure if any fn_extra pointers +# remain. +{ + hide_function_cache_leaks + Memcheck:Leak + match-leak-kinds: definite,possible,indirect + + ... + fun:cached_function_compile +} From 7f6ededa764b287ba593a2bb7fd566df8053213e Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 2 Aug 2025 19:43:53 -0400 Subject: [PATCH 64/67] Suppress complaints about leaks in TS dictionary loading. Like the situation with function cache loading, text search dictionary loading functions tend to leak some cruft into the dictionary's long-lived cache context. To judge by the examples in the core regression tests, not very many bytes are at stake. Moreover, I don't see a way to prevent such leaks without changing the API for TS template initialization functions: right now they do not have to worry about making sure that their results are long-lived. Hence, I think we should install a suppression rule rather than trying to fix this completely. However, I did grab some low-hanging fruit: several places were leaking the result of get_tsearch_config_filename. This seems worth doing mostly because they are inconsistent with other dictionaries that were freeing it already. Author: Tom Lane Reviewed-by: Andres Freund Discussion: https://postgr.es/m/285483.1746756246@sss.pgh.pa.us --- src/backend/tsearch/dict_ispell.c | 18 ++++++++++++------ src/backend/tsearch/dict_synonym.c | 1 + src/backend/tsearch/dict_thesaurus.c | 7 ++++--- src/backend/utils/cache/ts_cache.c | 4 +++- src/tools/valgrind.supp | 12 ++++++++++++ 5 files changed, 32 insertions(+), 10 deletions(-) diff --git a/src/backend/tsearch/dict_ispell.c b/src/backend/tsearch/dict_ispell.c index 63bd193a78a89..debfbf956cc1f 100644 --- a/src/backend/tsearch/dict_ispell.c +++ b/src/backend/tsearch/dict_ispell.c @@ -47,24 +47,30 @@ dispell_init(PG_FUNCTION_ARGS) if (strcmp(defel->defname, "dictfile") == 0) { + char *filename; + if (dictloaded) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("multiple DictFile parameters"))); - NIImportDictionary(&(d->obj), - get_tsearch_config_filename(defGetString(defel), - "dict")); + filename = get_tsearch_config_filename(defGetString(defel), + "dict"); + NIImportDictionary(&(d->obj), filename); + pfree(filename); dictloaded = true; } else if (strcmp(defel->defname, "afffile") == 0) { + char *filename; + if (affloaded) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("multiple AffFile parameters"))); - NIImportAffixes(&(d->obj), - get_tsearch_config_filename(defGetString(defel), - "affix")); + filename = get_tsearch_config_filename(defGetString(defel), + "affix"); + NIImportAffixes(&(d->obj), filename); + pfree(filename); affloaded = true; } else if (strcmp(defel->defname, "stopwords") == 0) diff --git a/src/backend/tsearch/dict_synonym.c b/src/backend/tsearch/dict_synonym.c index 0da5a9d686802..c2773eb01adee 100644 --- a/src/backend/tsearch/dict_synonym.c +++ b/src/backend/tsearch/dict_synonym.c @@ -199,6 +199,7 @@ dsynonym_init(PG_FUNCTION_ARGS) } tsearch_readline_end(&trst); + pfree(filename); d->len = cur; qsort(d->syn, d->len, sizeof(Syn), compareSyn); diff --git a/src/backend/tsearch/dict_thesaurus.c b/src/backend/tsearch/dict_thesaurus.c index 1bebe36a6910e..1e6bbde1ca7d8 100644 --- a/src/backend/tsearch/dict_thesaurus.c +++ b/src/backend/tsearch/dict_thesaurus.c @@ -167,17 +167,17 @@ addWrd(DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 nwrd, uint16 p static void thesaurusRead(const char *filename, DictThesaurus *d) { + char *real_filename = get_tsearch_config_filename(filename, "ths"); tsearch_readline_state trst; uint32 idsubst = 0; bool useasis = false; char *line; - filename = get_tsearch_config_filename(filename, "ths"); - if (!tsearch_readline_begin(&trst, filename)) + if (!tsearch_readline_begin(&trst, real_filename)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("could not open thesaurus file \"%s\": %m", - filename))); + real_filename))); while ((line = tsearch_readline(&trst)) != NULL) { @@ -297,6 +297,7 @@ thesaurusRead(const char *filename, DictThesaurus *d) d->nsubst = idsubst; tsearch_readline_end(&trst); + pfree(real_filename); } static TheLexeme * diff --git a/src/backend/utils/cache/ts_cache.c b/src/backend/utils/cache/ts_cache.c index 18cccd778fd8c..e8ae53238d07a 100644 --- a/src/backend/utils/cache/ts_cache.c +++ b/src/backend/utils/cache/ts_cache.c @@ -321,7 +321,9 @@ lookup_ts_dictionary_cache(Oid dictId) /* * Init method runs in dictionary's private memory context, and we - * make sure the options are stored there too + * make sure the options are stored there too. This typically + * results in a small amount of memory leakage, but it's not worth + * complicating the API for tmplinit functions to avoid it. */ oldcontext = MemoryContextSwitchTo(entry->dictCtx); diff --git a/src/tools/valgrind.supp b/src/tools/valgrind.supp index fad20c8f70804..3880007dfb3bb 100644 --- a/src/tools/valgrind.supp +++ b/src/tools/valgrind.supp @@ -215,3 +215,15 @@ ... fun:cached_function_compile } + +# Suppress complaints about stuff leaked during TS dictionary loading. +# Not very much is typically lost there, and preventing it would +# require a risky API change for TS tmplinit functions. +{ + hide_ts_dictionary_leaks + Memcheck:Leak + match-leak-kinds: definite,possible,indirect + + ... + fun:lookup_ts_dictionary_cache +} From b102c8c4733cf76ff0635dc440ee8dd11487ed95 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 2 Aug 2025 19:44:10 -0400 Subject: [PATCH 65/67] Silence complaints about leaks in PlanCacheComputeResultDesc. CompleteCachedPlan intentionally doesn't worry about small leaks from PlanCacheComputeResultDesc. However, Valgrind knows nothing of engineering tradeoffs and complains anyway. Silence it by doing things the hard way if USE_VALGRIND. I don't really love this patch, because it makes the handling of plansource->resultDesc different from the handling of query dependencies and search_path just above, which likewise are willing to accept small leaks into the cached plan's context. However, those cases aren't provoking Valgrind complaints. (Perhaps in a CLOBBER_CACHE_ALWAYS build, they would?) For the moment, this makes the src/pl/plpgsql tests leak-free according to Valgrind. Author: Tom Lane Reviewed-by: Andres Freund Discussion: https://postgr.es/m/285483.1746756246@sss.pgh.pa.us --- src/backend/utils/cache/plancache.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c index 0c506d320b137..6661d2c6b7391 100644 --- a/src/backend/utils/cache/plancache.c +++ b/src/backend/utils/cache/plancache.c @@ -463,8 +463,7 @@ CompleteCachedPlan(CachedPlanSource *plansource, /* * Save the final parameter types (or other parameter specification data) - * into the source_context, as well as our other parameters. Also save - * the result tuple descriptor. + * into the source_context, as well as our other parameters. */ MemoryContextSwitchTo(source_context); @@ -480,9 +479,25 @@ CompleteCachedPlan(CachedPlanSource *plansource, plansource->parserSetupArg = parserSetupArg; plansource->cursor_options = cursor_options; plansource->fixed_result = fixed_result; - plansource->resultDesc = PlanCacheComputeResultDesc(querytree_list); + /* + * Also save the result tuple descriptor. PlanCacheComputeResultDesc may + * leak some cruft; normally we just accept that to save a copy step, but + * in USE_VALGRIND mode be tidy by running it in the caller's context. + */ +#ifdef USE_VALGRIND + MemoryContextSwitchTo(oldcxt); + plansource->resultDesc = PlanCacheComputeResultDesc(querytree_list); + if (plansource->resultDesc) + { + MemoryContextSwitchTo(source_context); + plansource->resultDesc = CreateTupleDescCopy(plansource->resultDesc); + MemoryContextSwitchTo(oldcxt); + } +#else + plansource->resultDesc = PlanCacheComputeResultDesc(querytree_list); MemoryContextSwitchTo(oldcxt); +#endif plansource->is_complete = true; plansource->is_valid = true; From 4fbfdde58e4cd091f88737dffa241b08c23d8829 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 2 Aug 2025 21:26:21 -0400 Subject: [PATCH 66/67] Avoid leakage of zero-length arrays in partition_bounds_copy(). If ndatums is zero, the code would allocate zero-length boundKinds and boundDatums chunks, which would have nothing pointing to them, leading to Valgrind complaints. Rearrange the code to avoid the useless pallocs, and also to not bother computing byval/typlen when they aren't used. I'm unsure why I didn't see this in my Valgrind testing back in May. This code hasn't changed since then, but maybe we added a regression test that reaches this edge case. Or possibly I just failed to notice the reports, which do say "0 bytes lost". Author: Tom Lane Discussion: https://postgr.es/m/285483.1746756246@sss.pgh.pa.us --- src/backend/partitioning/partbounds.c | 57 ++++++++++++++------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c index 4bdc2941efb21..822cf4ec451a4 100644 --- a/src/backend/partitioning/partbounds.c +++ b/src/backend/partitioning/partbounds.c @@ -1007,9 +1007,6 @@ partition_bounds_copy(PartitionBoundInfo src, int ndatums; int nindexes; int partnatts; - bool hash_part; - int natts; - Datum *boundDatums; dest = (PartitionBoundInfo) palloc(sizeof(PartitionBoundInfoData)); @@ -1023,7 +1020,7 @@ partition_bounds_copy(PartitionBoundInfo src, dest->datums = (Datum **) palloc(sizeof(Datum *) * ndatums); - if (src->kind != NULL) + if (src->kind != NULL && ndatums > 0) { PartitionRangeDatumKind *boundKinds; @@ -1058,36 +1055,40 @@ partition_bounds_copy(PartitionBoundInfo src, * For hash partitioning, datums array will have two elements - modulus * and remainder. */ - hash_part = (key->strategy == PARTITION_STRATEGY_HASH); - natts = hash_part ? 2 : partnatts; - boundDatums = palloc(ndatums * natts * sizeof(Datum)); - - for (i = 0; i < ndatums; i++) + if (ndatums > 0) { - int j; - - dest->datums[i] = &boundDatums[i * natts]; + bool hash_part = (key->strategy == PARTITION_STRATEGY_HASH); + int natts = hash_part ? 2 : partnatts; + Datum *boundDatums = palloc(ndatums * natts * sizeof(Datum)); - for (j = 0; j < natts; j++) + for (i = 0; i < ndatums; i++) { - bool byval; - int typlen; + int j; - if (hash_part) - { - typlen = sizeof(int32); /* Always int4 */ - byval = true; /* int4 is pass-by-value */ - } - else + dest->datums[i] = &boundDatums[i * natts]; + + for (j = 0; j < natts; j++) { - byval = key->parttypbyval[j]; - typlen = key->parttyplen[j]; - } + if (dest->kind == NULL || + dest->kind[i][j] == PARTITION_RANGE_DATUM_VALUE) + { + bool byval; + int typlen; - if (dest->kind == NULL || - dest->kind[i][j] == PARTITION_RANGE_DATUM_VALUE) - dest->datums[i][j] = datumCopy(src->datums[i][j], - byval, typlen); + if (hash_part) + { + typlen = sizeof(int32); /* Always int4 */ + byval = true; /* int4 is pass-by-value */ + } + else + { + byval = key->parttypbyval[j]; + typlen = key->parttyplen[j]; + } + dest->datums[i][j] = datumCopy(src->datums[i][j], + byval, typlen); + } + } } } From 5c8eda1f72a2b0a8c48ada9b872eb5ef581f7c81 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 3 Aug 2025 13:01:17 -0400 Subject: [PATCH 67/67] Take a little more care in set_backtrace(). Coverity complained that the "errtrace" string is leaked if we return early because backtrace_symbols fails. Another criticism that could be leveled at this is that not providing any hint of what happened is user-unfriendly. Fix that. The odds of a leak here are small, and typically it wouldn't matter anyway since the leak will be in ErrorContext which will soon get reset. So I'm not feeling a need to back-patch. --- src/backend/utils/error/elog.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c index 47af743990fe9..afce1a8e1f003 100644 --- a/src/backend/utils/error/elog.c +++ b/src/backend/utils/error/elog.c @@ -1128,12 +1128,15 @@ set_backtrace(ErrorData *edata, int num_skip) nframes = backtrace(buf, lengthof(buf)); strfrms = backtrace_symbols(buf, nframes); - if (strfrms == NULL) - return; - - for (int i = num_skip; i < nframes; i++) - appendStringInfo(&errtrace, "\n%s", strfrms[i]); - free(strfrms); + if (strfrms != NULL) + { + for (int i = num_skip; i < nframes; i++) + appendStringInfo(&errtrace, "\n%s", strfrms[i]); + free(strfrms); + } + else + appendStringInfoString(&errtrace, + "insufficient memory for backtrace generation"); } #else appendStringInfoString(&errtrace, pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy