Skip to content

Commit 39b0369

Browse files
committed
Log long wait time on recovery conflict when it's resolved.
This is a follow-up of the work done in commit 0650ff2. This commit extends log_recovery_conflict_waits so that a log message is produced also when recovery conflict has already been resolved after deadlock_timeout passes, i.e., when the startup process finishes waiting for recovery conflict after deadlock_timeout. This is useful in investigating how long recovery conflicts prevented the recovery from applying WAL. Author: Fujii Masao Reviewed-by: Kyotaro Horiguchi, Bertrand Drouvot Discussion: https://postgr.es/m/9a60178c-a853-1440-2cdc-c3af916cff59@amazon.com
1 parent 6ecaaf8 commit 39b0369

File tree

5 files changed

+61
-15
lines changed

5 files changed

+61
-15
lines changed

doc/src/sgml/config.sgml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6961,7 +6961,7 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
69616961
<listitem>
69626962
<para>
69636963
Controls whether a log message is produced when the startup process
6964-
is waiting longer than <varname>deadlock_timeout</varname>
6964+
waits longer than <varname>deadlock_timeout</varname>
69656965
for recovery conflicts. This is useful in determining if recovery
69666966
conflicts prevent the recovery from applying WAL.
69676967
</para>

src/backend/storage/buffer/bufmgr.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4034,6 +4034,16 @@ LockBufferForCleanup(Buffer buffer)
40344034
/* Successfully acquired exclusive lock with pincount 1 */
40354035
UnlockBufHdr(bufHdr, buf_state);
40364036

4037+
/*
4038+
* Emit the log message if recovery conflict on buffer pin was
4039+
* resolved but the startup process waited longer than
4040+
* deadlock_timeout for it.
4041+
*/
4042+
if (logged_recovery_conflict)
4043+
LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
4044+
waitStart, GetCurrentTimestamp(),
4045+
NULL, false);
4046+
40374047
/* Report change to non-waiting status */
40384048
if (new_status)
40394049
{
@@ -4088,7 +4098,7 @@ LockBufferForCleanup(Buffer buffer)
40884098
DeadlockTimeout))
40894099
{
40904100
LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
4091-
waitStart, now, NULL);
4101+
waitStart, now, NULL, true);
40924102
logged_recovery_conflict = true;
40934103
}
40944104
}

src/backend/storage/ipc/standby.c

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -226,18 +226,27 @@ WaitExceedsMaxStandbyDelay(uint32 wait_event_info)
226226
* wait_start is the timestamp when the caller started to wait.
227227
* now is the timestamp when this function has been called.
228228
* wait_list is the list of virtual transaction ids assigned to
229-
* conflicting processes.
229+
* conflicting processes. still_waiting indicates whether
230+
* the startup process is still waiting for the recovery conflict
231+
* to be resolved or not.
230232
*/
231233
void
232234
LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
233-
TimestampTz now, VirtualTransactionId *wait_list)
235+
TimestampTz now, VirtualTransactionId *wait_list,
236+
bool still_waiting)
234237
{
235238
long secs;
236239
int usecs;
237240
long msecs;
238241
StringInfoData buf;
239242
int nprocs = 0;
240243

244+
/*
245+
* There must be no conflicting processes when the recovery conflict has
246+
* already been resolved.
247+
*/
248+
Assert(still_waiting || wait_list == NULL);
249+
241250
TimestampDifference(wait_start, now, &secs, &usecs);
242251
msecs = secs * 1000 + usecs / 1000;
243252
usecs = usecs % 1000;
@@ -275,12 +284,21 @@ LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
275284
* conflicting backends in a detail message. Note that if all the backends
276285
* in the list are not active, no detail message is logged.
277286
*/
278-
ereport(LOG,
279-
errmsg("recovery still waiting after %ld.%03d ms: %s",
280-
msecs, usecs, _(get_recovery_conflict_desc(reason))),
281-
nprocs > 0 ? errdetail_log_plural("Conflicting process: %s.",
282-
"Conflicting processes: %s.",
283-
nprocs, buf.data) : 0);
287+
if (still_waiting)
288+
{
289+
ereport(LOG,
290+
errmsg("recovery still waiting after %ld.%03d ms: %s",
291+
msecs, usecs, _(get_recovery_conflict_desc(reason))),
292+
nprocs > 0 ? errdetail_log_plural("Conflicting process: %s.",
293+
"Conflicting processes: %s.",
294+
nprocs, buf.data) : 0);
295+
}
296+
else
297+
{
298+
ereport(LOG,
299+
errmsg("recovery finished waiting after %ld.%03d ms: %s",
300+
msecs, usecs, _(get_recovery_conflict_desc(reason))));
301+
}
284302

285303
if (nprocs > 0)
286304
pfree(buf.data);
@@ -375,13 +393,12 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
375393

376394
/*
377395
* Emit the log message if the startup process is waiting
378-
* longer than deadlock_timeout for recovery conflict on
379-
* buffer pin.
396+
* longer than deadlock_timeout for recovery conflict.
380397
*/
381398
if (maybe_log_conflict &&
382399
TimestampDifferenceExceeds(waitStart, now, DeadlockTimeout))
383400
{
384-
LogRecoveryConflict(reason, waitStart, now, waitlist);
401+
LogRecoveryConflict(reason, waitStart, now, waitlist, true);
385402
logged_recovery_conflict = true;
386403
}
387404
}
@@ -391,6 +408,14 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
391408
waitlist++;
392409
}
393410

411+
/*
412+
* Emit the log message if recovery conflict was resolved but the startup
413+
* process waited longer than deadlock_timeout for it.
414+
*/
415+
if (logged_recovery_conflict)
416+
LogRecoveryConflict(reason, waitStart, GetCurrentTimestamp(),
417+
NULL, false);
418+
394419
/* Reset ps display if we changed it */
395420
if (new_status)
396421
{

src/backend/storage/lmgr/proc.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1322,7 +1322,8 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
13221322
* longer than deadlock_timeout.
13231323
*/
13241324
LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK,
1325-
standbyWaitStart, now, cnt > 0 ? vxids : NULL);
1325+
standbyWaitStart, now,
1326+
cnt > 0 ? vxids : NULL, true);
13261327
logged_recovery_conflict = true;
13271328
}
13281329
}
@@ -1607,6 +1608,15 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
16071608
disable_timeout(DEADLOCK_TIMEOUT, false);
16081609
}
16091610

1611+
/*
1612+
* Emit the log message if recovery conflict on lock was resolved but the
1613+
* startup process waited longer than deadlock_timeout for it.
1614+
*/
1615+
if (InHotStandby && logged_recovery_conflict)
1616+
LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK,
1617+
standbyWaitStart, GetCurrentTimestamp(),
1618+
NULL, false);
1619+
16101620
/*
16111621
* Re-acquire the lock table's partition lock. We have to do this to hold
16121622
* off cancel/die interrupts before we can mess with lockAwaited (else we

src/include/storage/standby.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ extern void StandbyDeadLockHandler(void);
4040
extern void StandbyTimeoutHandler(void);
4141
extern void StandbyLockTimeoutHandler(void);
4242
extern void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
43-
TimestampTz cur_ts, VirtualTransactionId *wait_list);
43+
TimestampTz cur_ts, VirtualTransactionId *wait_list,
44+
bool still_waiting);
4445

4546
/*
4647
* Standby Rmgr (RM_STANDBY_ID)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy