Skip to content

Commit 0650ff2

Browse files
committed
Add GUC to log long wait times on recovery conflicts.
This commit adds GUC log_recovery_conflict_waits that controls whether a log message is produced when the startup process is waiting longer than deadlock_timeout for recovery conflicts. This is useful in determining if recovery conflicts prevent the recovery from applying WAL. Note that currently a log message is produced only when recovery conflict has not been resolved yet even after deadlock_timeout passes, i.e., only when the startup process is still waiting for recovery conflict even after deadlock_timeout. Author: Bertrand Drouvot, Masahiko Sawada Reviewed-by: Alvaro Herrera, Kyotaro Horiguchi, Fujii Masao Discussion: https://postgr.es/m/9a60178c-a853-1440-2cdc-c3af916cff59@amazon.com
1 parent f7a1a80 commit 0650ff2

File tree

8 files changed

+292
-30
lines changed

8 files changed

+292
-30
lines changed

doc/src/sgml/config.sgml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6952,6 +6952,28 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
69526952
</listitem>
69536953
</varlistentry>
69546954

6955+
<varlistentry id="guc-log-recovery-conflict-waits" xreflabel="log_recovery_conflict_waits">
6956+
<term><varname>log_recovery_conflict_waits</varname> (<type>boolean</type>)
6957+
<indexterm>
6958+
<primary><varname>log_recovery_conflict_waits</varname> configuration parameter</primary>
6959+
</indexterm>
6960+
</term>
6961+
<listitem>
6962+
<para>
6963+
Controls whether a log message is produced when the startup process
6964+
is waiting longer than <varname>deadlock_timeout</varname>
6965+
for recovery conflicts. This is useful in determining if recovery
6966+
conflicts prevent the recovery from applying WAL.
6967+
</para>
6968+
6969+
<para>
6970+
The default is <literal>off</literal>. This parameter can only be set
6971+
in the <filename>postgresql.conf</filename> file or on the server
6972+
command line.
6973+
</para>
6974+
</listitem>
6975+
</varlistentry>
6976+
69556977
<varlistentry id="guc-log-parameter-max-length" xreflabel="log_parameter_max_length">
69566978
<term><varname>log_parameter_max_length</varname> (<type>integer</type>)
69576979
<indexterm>

doc/src/sgml/high-availability.sgml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2071,6 +2071,12 @@ if (!triggered)
20712071
server. The <structname>pg_stat_database</structname> system view also contains
20722072
summary information.
20732073
</para>
2074+
2075+
<para>
2076+
Users can control whether a log message is produced when WAL replay is waiting
2077+
longer than <varname>deadlock_timeout</varname> for conflicts. This
2078+
is controlled by the <xref linkend="guc-log-recovery-conflict-waits"/> parameter.
2079+
</para>
20742080
</sect2>
20752081

20762082
<sect2 id="hot-standby-admin">

src/backend/storage/buffer/bufmgr.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3809,6 +3809,8 @@ LockBufferForCleanup(Buffer buffer)
38093809
{
38103810
BufferDesc *bufHdr;
38113811
char *new_status = NULL;
3812+
TimestampTz waitStart = 0;
3813+
bool logged_recovery_conflict = false;
38123814

38133815
Assert(BufferIsPinned(buffer));
38143816
Assert(PinCountWaitBuf == NULL);
@@ -3882,6 +3884,34 @@ LockBufferForCleanup(Buffer buffer)
38823884
new_status[len] = '\0'; /* truncate off " waiting" */
38833885
}
38843886

3887+
/*
3888+
* Emit the log message if the startup process is waiting longer
3889+
* than deadlock_timeout for recovery conflict on buffer pin.
3890+
*
3891+
* Skip this if first time through because the startup process has
3892+
* not started waiting yet in this case. So, the wait start
3893+
* timestamp is set after this logic.
3894+
*/
3895+
if (waitStart != 0 && !logged_recovery_conflict)
3896+
{
3897+
TimestampTz now = GetCurrentTimestamp();
3898+
3899+
if (TimestampDifferenceExceeds(waitStart, now,
3900+
DeadlockTimeout))
3901+
{
3902+
LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
3903+
waitStart, now, NULL);
3904+
logged_recovery_conflict = true;
3905+
}
3906+
}
3907+
3908+
/*
3909+
* Set the wait start timestamp if logging is enabled and first
3910+
* time through.
3911+
*/
3912+
if (log_recovery_conflict_waits && waitStart == 0)
3913+
waitStart = GetCurrentTimestamp();
3914+
38853915
/* Publish the bufid that Startup process waits on */
38863916
SetStartupBufferPinWaitBufId(buffer - 1);
38873917
/* Set alarm and then wait to be signaled by UnpinBuffer() */

src/backend/storage/ipc/standby.c

Lines changed: 173 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
int vacuum_defer_cleanup_age;
4040
int max_standby_archive_delay = 30 * 1000;
4141
int max_standby_streaming_delay = 30 * 1000;
42+
bool log_recovery_conflict_waits = false;
4243

4344
static HTAB *RecoveryLockLists;
4445

@@ -53,6 +54,7 @@ static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlis
5354
static void SendRecoveryConflictWithBufferPin(ProcSignalReason reason);
5455
static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts);
5556
static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks);
57+
static const char *get_recovery_conflict_desc(ProcSignalReason reason);
5658

5759
/*
5860
* Keep track of all the locks owned by a given transaction.
@@ -218,31 +220,100 @@ WaitExceedsMaxStandbyDelay(uint32 wait_event_info)
218220
return false;
219221
}
220222

223+
/*
224+
* Log the recovery conflict.
225+
*
226+
* wait_start is the timestamp when the caller started to wait.
227+
* now is the timestamp when this function has been called.
228+
* wait_list is the list of virtual transaction ids assigned to
229+
* conflicting processes.
230+
*/
231+
void
232+
LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
233+
TimestampTz now, VirtualTransactionId *wait_list)
234+
{
235+
long secs;
236+
int usecs;
237+
long msecs;
238+
StringInfoData buf;
239+
int nprocs = 0;
240+
241+
TimestampDifference(wait_start, now, &secs, &usecs);
242+
msecs = secs * 1000 + usecs / 1000;
243+
usecs = usecs % 1000;
244+
245+
if (wait_list)
246+
{
247+
VirtualTransactionId *vxids;
248+
249+
/* Construct a string of list of the conflicting processes */
250+
vxids = wait_list;
251+
while (VirtualTransactionIdIsValid(*vxids))
252+
{
253+
PGPROC *proc = BackendIdGetProc(vxids->backendId);
254+
255+
/* proc can be NULL if the target backend is not active */
256+
if (proc)
257+
{
258+
if (nprocs == 0)
259+
{
260+
initStringInfo(&buf);
261+
appendStringInfo(&buf, "%d", proc->pid);
262+
}
263+
else
264+
appendStringInfo(&buf, ", %d", proc->pid);
265+
266+
nprocs++;
267+
}
268+
269+
vxids++;
270+
}
271+
}
272+
273+
/*
274+
* If wait_list is specified, report the list of PIDs of active
275+
* conflicting backends in a detail message. Note that if all the backends
276+
* in the list are not active, no detail message is logged.
277+
*/
278+
ereport(LOG,
279+
errmsg("recovery still waiting after %ld.%03d ms: %s",
280+
msecs, usecs, _(get_recovery_conflict_desc(reason))),
281+
nprocs > 0 ? errdetail_log_plural("Conflicting process: %s.",
282+
"Conflicting processes: %s.",
283+
nprocs, buf.data) : 0);
284+
285+
if (nprocs > 0)
286+
pfree(buf.data);
287+
}
288+
221289
/*
222290
* This is the main executioner for any query backend that conflicts with
223291
* recovery processing. Judgement has already been passed on it within
224292
* a specific rmgr. Here we just issue the orders to the procs. The procs
225293
* then throw the required error as instructed.
226294
*
227-
* If report_waiting is true, "waiting" is reported in PS display if necessary.
228-
* If the caller has already reported that, report_waiting should be false.
229-
* Otherwise, "waiting" is reported twice unexpectedly.
295+
* If report_waiting is true, "waiting" is reported in PS display and the
296+
* wait for recovery conflict is reported in the log, if necessary. If
297+
* the caller is responsible for reporting them, report_waiting should be
298+
* false. Otherwise, both the caller and this function report the same
299+
* thing unexpectedly.
230300
*/
231301
static void
232302
ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
233303
ProcSignalReason reason, uint32 wait_event_info,
234304
bool report_waiting)
235305
{
236306
TimestampTz waitStart = 0;
237-
char *new_status;
307+
char *new_status = NULL;
308+
bool logged_recovery_conflict = false;
238309

239310
/* Fast exit, to avoid a kernel call if there's no work to be done. */
240311
if (!VirtualTransactionIdIsValid(*waitlist))
241312
return;
242313

243-
if (report_waiting)
314+
/* Set the wait start timestamp for reporting */
315+
if (report_waiting && (log_recovery_conflict_waits || update_process_title))
244316
waitStart = GetCurrentTimestamp();
245-
new_status = NULL; /* we haven't changed the ps display */
246317

247318
while (VirtualTransactionIdIsValid(*waitlist))
248319
{
@@ -252,25 +323,6 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
252323
/* wait until the virtual xid is gone */
253324
while (!VirtualXactLock(*waitlist, false))
254325
{
255-
/*
256-
* Report via ps if we have been waiting for more than 500 msec
257-
* (should that be configurable?)
258-
*/
259-
if (update_process_title && new_status == NULL && report_waiting &&
260-
TimestampDifferenceExceeds(waitStart, GetCurrentTimestamp(),
261-
500))
262-
{
263-
const char *old_status;
264-
int len;
265-
266-
old_status = get_ps_display(&len);
267-
new_status = (char *) palloc(len + 8 + 1);
268-
memcpy(new_status, old_status, len);
269-
strcpy(new_status + len, " waiting");
270-
set_ps_display(new_status);
271-
new_status[len] = '\0'; /* truncate off " waiting" */
272-
}
273-
274326
/* Is it time to kill it? */
275327
if (WaitExceedsMaxStandbyDelay(wait_event_info))
276328
{
@@ -289,6 +341,50 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
289341
if (pid != 0)
290342
pg_usleep(5000L);
291343
}
344+
345+
if (waitStart != 0 && (!logged_recovery_conflict || new_status == NULL))
346+
{
347+
TimestampTz now = 0;
348+
bool maybe_log_conflict;
349+
bool maybe_update_title;
350+
351+
maybe_log_conflict = (log_recovery_conflict_waits && !logged_recovery_conflict);
352+
maybe_update_title = (update_process_title && new_status == NULL);
353+
354+
/* Get the current timestamp if not report yet */
355+
if (maybe_log_conflict || maybe_update_title)
356+
now = GetCurrentTimestamp();
357+
358+
/*
359+
* Report via ps if we have been waiting for more than 500
360+
* msec (should that be configurable?)
361+
*/
362+
if (maybe_update_title &&
363+
TimestampDifferenceExceeds(waitStart, now, 500))
364+
{
365+
const char *old_status;
366+
int len;
367+
368+
old_status = get_ps_display(&len);
369+
new_status = (char *) palloc(len + 8 + 1);
370+
memcpy(new_status, old_status, len);
371+
strcpy(new_status + len, " waiting");
372+
set_ps_display(new_status);
373+
new_status[len] = '\0'; /* truncate off " waiting" */
374+
}
375+
376+
/*
377+
* Emit the log message if the startup process is waiting
378+
* longer than deadlock_timeout for recovery conflict on
379+
* buffer pin.
380+
*/
381+
if (maybe_log_conflict &&
382+
TimestampDifferenceExceeds(waitStart, now, DeadlockTimeout))
383+
{
384+
LogRecoveryConflict(reason, waitStart, now, waitlist);
385+
logged_recovery_conflict = true;
386+
}
387+
}
292388
}
293389

294390
/* The virtual transaction is gone now, wait for the next one */
@@ -405,9 +501,18 @@ ResolveRecoveryConflictWithDatabase(Oid dbid)
405501
* hot-standby backend processes. If deadlock_timeout is reached in
406502
* this function, all the backends holding the conflicting locks are
407503
* requested to check themselves for deadlocks.
504+
*
505+
* logging_conflict should be true if the recovery conflict has not been
506+
* logged yet even though logging is enabled. After deadlock_timeout is
507+
* reached and the request for deadlock check is sent, we wait again to
508+
* be signaled by the release of the lock if logging_conflict is false.
509+
* Otherwise we return without waiting again so that the caller can report
510+
* the recovery conflict. In this case, then, this function is called again
511+
* with logging_conflict=false (because the recovery conflict has already
512+
* been logged) and we will wait again for the lock to be released.
408513
*/
409514
void
410-
ResolveRecoveryConflictWithLock(LOCKTAG locktag)
515+
ResolveRecoveryConflictWithLock(LOCKTAG locktag, bool logging_conflict)
411516
{
412517
TimestampTz ltime;
413518

@@ -494,6 +599,15 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag)
494599
backends++;
495600
}
496601

602+
/*
603+
* Exit if the recovery conflict has not been logged yet even though
604+
* logging is enabled, so that the caller can log that. Then
605+
* RecoveryConflictWithLock() is called again and we will wait again
606+
* for the lock to be released.
607+
*/
608+
if (logging_conflict)
609+
goto cleanup;
610+
497611
/*
498612
* Wait again here to be signaled by the release of the Relation Lock,
499613
* to prevent the subsequent RecoveryConflictWithLock() from causing
@@ -1209,3 +1323,36 @@ LogStandbyInvalidations(int nmsgs, SharedInvalidationMessage *msgs,
12091323
nmsgs * sizeof(SharedInvalidationMessage));
12101324
XLogInsert(RM_STANDBY_ID, XLOG_INVALIDATIONS);
12111325
}
1326+
1327+
/* Return the description of recovery conflict */
1328+
static const char *
1329+
get_recovery_conflict_desc(ProcSignalReason reason)
1330+
{
1331+
const char *reasonDesc = gettext_noop("unknown reason");
1332+
1333+
switch (reason)
1334+
{
1335+
case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
1336+
reasonDesc = gettext_noop("recovery conflict on buffer pin");
1337+
break;
1338+
case PROCSIG_RECOVERY_CONFLICT_LOCK:
1339+
reasonDesc = gettext_noop("recovery conflict on lock");
1340+
break;
1341+
case PROCSIG_RECOVERY_CONFLICT_TABLESPACE:
1342+
reasonDesc = gettext_noop("recovery conflict on tablespace");
1343+
break;
1344+
case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
1345+
reasonDesc = gettext_noop("recovery conflict on snapshot");
1346+
break;
1347+
case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
1348+
reasonDesc = gettext_noop("recovery conflict on buffer deadlock");
1349+
break;
1350+
case PROCSIG_RECOVERY_CONFLICT_DATABASE:
1351+
reasonDesc = gettext_noop("recovery conflict on database");
1352+
break;
1353+
default:
1354+
break;
1355+
}
1356+
1357+
return reasonDesc;
1358+
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy