Skip to content

Commit b95a720

Browse files
Re-enable max_standby_delay = -1 using deadlock detection on startup
process. If startup waits on a buffer pin we send a request to all backends to cancel themselves if they are holding the buffer pin required and they are also waiting on a lock. If not, startup waits until max_standby_delay before cancelling any backend waiting for the requested buffer pin.
1 parent fafa374 commit b95a720

File tree

8 files changed

+86
-26
lines changed

8 files changed

+86
-26
lines changed

src/backend/storage/ipc/procsignal.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/storage/ipc/procsignal.c,v 1.4 2010/01/23 16:37:12 sriggs Exp $
11+
* $PostgreSQL: pgsql/src/backend/storage/ipc/procsignal.c,v 1.5 2010/02/13 01:32:19 sriggs Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -272,6 +272,9 @@ procsignal_sigusr1_handler(SIGNAL_ARGS)
272272
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT))
273273
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT);
274274

275+
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK))
276+
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
277+
275278
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN))
276279
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
277280

src/backend/storage/ipc/standby.c

Lines changed: 46 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* Portions Copyright (c) 1994, Regents of the University of California
1212
*
1313
* IDENTIFICATION
14-
* $PostgreSQL: pgsql/src/backend/storage/ipc/standby.c,v 1.11 2010/02/11 19:35:22 sriggs Exp $
14+
* $PostgreSQL: pgsql/src/backend/storage/ipc/standby.c,v 1.12 2010/02/13 01:32:19 sriggs Exp $
1515
*
1616
*-------------------------------------------------------------------------
1717
*/
@@ -127,6 +127,9 @@ WaitExceedsMaxStandbyDelay(void)
127127
long delay_secs;
128128
int delay_usecs;
129129

130+
if (MaxStandbyDelay == -1)
131+
return false;
132+
130133
/* Are we past max_standby_delay? */
131134
TimestampDifference(GetLatestXLogTime(), GetCurrentTimestamp(),
132135
&delay_secs, &delay_usecs);
@@ -351,15 +354,15 @@ ResolveRecoveryConflictWithLock(Oid dbOid, Oid relOid)
351354
* they hold one of the buffer pins that is blocking Startup process. If so,
352355
* backends will take an appropriate error action, ERROR or FATAL.
353356
*
354-
* A secondary purpose of this is to avoid deadlocks that might occur between
355-
* the Startup process and lock waiters. Deadlocks occur because if queries
357+
* We also check for deadlocks before we wait, though applications that cause
358+
* these will be extremely rare. Deadlocks occur because if queries
356359
* wait on a lock, that must be behind an AccessExclusiveLock, which can only
357-
* be clared if the Startup process replays a transaction completion record.
358-
* If Startup process is waiting then that is a deadlock. If we allowed a
359-
* setting of max_standby_delay that meant "wait forever" we would then need
360-
* special code to protect against deadlock. Such deadlocks are rare, so the
361-
* code would be almost certainly buggy, so we avoid both long waits and
362-
* deadlocks using the same mechanism.
360+
* be cleared if the Startup process replays a transaction completion record.
361+
* If Startup process is also waiting then that is a deadlock. The deadlock
362+
* can occur if the query is waiting and then the Startup sleeps, or if
363+
* Startup is sleeping and the the query waits on a lock. We protect against
364+
* only the former sequence here, the latter sequence is checked prior to
365+
* the query sleeping, in CheckRecoveryConflictDeadlock().
363366
*/
364367
void
365368
ResolveRecoveryConflictWithBufferPin(void)
@@ -368,11 +371,23 @@ ResolveRecoveryConflictWithBufferPin(void)
368371

369372
Assert(InHotStandby);
370373

371-
/*
372-
* Signal immediately or set alarm for later.
373-
*/
374374
if (MaxStandbyDelay == 0)
375-
SendRecoveryConflictWithBufferPin();
375+
{
376+
/*
377+
* We don't want to wait, so just tell everybody holding the pin to
378+
* get out of town.
379+
*/
380+
SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
381+
}
382+
else if (MaxStandbyDelay == -1)
383+
{
384+
/*
385+
* Send out a request to check for buffer pin deadlocks before we wait.
386+
* This is fairly cheap, so no need to wait for deadlock timeout before
387+
* trying to send it out.
388+
*/
389+
SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
390+
}
376391
else
377392
{
378393
TimestampTz now;
@@ -386,13 +401,25 @@ ResolveRecoveryConflictWithBufferPin(void)
386401
&standby_delay_secs, &standby_delay_usecs);
387402

388403
if (standby_delay_secs >= MaxStandbyDelay)
389-
SendRecoveryConflictWithBufferPin();
404+
{
405+
/*
406+
* We're already behind, so clear a path as quickly as possible.
407+
*/
408+
SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
409+
}
390410
else
391411
{
392412
TimestampTz fin_time; /* Expected wake-up time by timer */
393413
long timer_delay_secs; /* Amount of time we set timer for */
394414
int timer_delay_usecs = 0;
395415

416+
/*
417+
* Send out a request to check for buffer pin deadlocks before we wait.
418+
* This is fairly cheap, so no need to wait for deadlock timeout before
419+
* trying to send it out.
420+
*/
421+
SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
422+
396423
/*
397424
* How much longer we should wait?
398425
*/
@@ -435,15 +462,18 @@ ResolveRecoveryConflictWithBufferPin(void)
435462
}
436463

437464
void
438-
SendRecoveryConflictWithBufferPin(void)
465+
SendRecoveryConflictWithBufferPin(ProcSignalReason reason)
439466
{
467+
Assert(reason == PROCSIG_RECOVERY_CONFLICT_BUFFERPIN ||
468+
reason == PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
469+
440470
/*
441471
* We send signal to all backends to ask them if they are holding
442472
* the buffer pin which is delaying the Startup process. We must
443473
* not set the conflict flag yet, since most backends will be innocent.
444474
* Let the SIGUSR1 handling in each backend decide their own fate.
445475
*/
446-
CancelDBBackends(InvalidOid, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, false);
476+
CancelDBBackends(InvalidOid, reason, false);
447477
}
448478

449479
/*

src/backend/storage/lmgr/proc.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.215 2010/02/08 04:33:54 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.216 2010/02/13 01:32:19 sriggs Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -45,6 +45,7 @@
4545
#include "storage/pmsignal.h"
4646
#include "storage/proc.h"
4747
#include "storage/procarray.h"
48+
#include "storage/procsignal.h"
4849
#include "storage/spin.h"
4950

5051

@@ -556,6 +557,15 @@ HaveNFreeProcs(int n)
556557
return (n <= 0);
557558
}
558559

560+
bool
561+
IsWaitingForLock(void)
562+
{
563+
if (lockAwaited == NULL)
564+
return false;
565+
566+
return true;
567+
}
568+
559569
/*
560570
* Cancel any pending wait for lock, when aborting a transaction.
561571
*
@@ -1670,7 +1680,7 @@ CheckStandbyTimeout(void)
16701680
now = GetCurrentTimestamp();
16711681

16721682
if (now >= statement_fin_time)
1673-
SendRecoveryConflictWithBufferPin();
1683+
SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
16741684
else
16751685
{
16761686
/* Not time yet, so (re)schedule the interrupt */

src/backend/tcop/postgres.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.587 2010/01/23 17:04:05 sriggs Exp $
11+
* $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.588 2010/02/13 01:32:19 sriggs Exp $
1212
*
1313
* NOTES
1414
* this is the "main" module of the postgres backend and
@@ -2278,6 +2278,9 @@ errdetail_recovery_conflict(void)
22782278
case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
22792279
errdetail("User query might have needed to see row versions that must be removed.");
22802280
break;
2281+
case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
2282+
errdetail("User transaction caused buffer deadlock with recovery.");
2283+
break;
22812284
case PROCSIG_RECOVERY_CONFLICT_DATABASE:
22822285
errdetail("User was connected to a database that must be dropped.");
22832286
break;
@@ -2754,6 +2757,15 @@ RecoveryConflictInterrupt(ProcSignalReason reason)
27542757
RecoveryConflictReason = reason;
27552758
switch (reason)
27562759
{
2760+
case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
2761+
/*
2762+
* If we aren't waiting for a lock we can never deadlock.
2763+
*/
2764+
if (!IsWaitingForLock())
2765+
return;
2766+
2767+
/* Intentional drop through to check wait for pin */
2768+
27572769
case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
27582770
/*
27592771
* If we aren't blocking the Startup process there is
@@ -2819,6 +2831,8 @@ RecoveryConflictInterrupt(ProcSignalReason reason)
28192831
elog(FATAL, "Unknown conflict mode");
28202832
}
28212833

2834+
Assert(RecoveryConflictPending && (QueryCancelPending || ProcDiePending));
2835+
28222836
/*
28232837
* If it's safe to interrupt, and we're waiting for input or a lock,
28242838
* service the interrupt immediately

src/backend/utils/misc/guc.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* Written by Peter Eisentraut <peter_e@gmx.net>.
1111
*
1212
* IDENTIFICATION
13-
* $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.538 2010/02/01 13:40:28 sriggs Exp $
13+
* $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.539 2010/02/13 01:32:20 sriggs Exp $
1414
*
1515
*--------------------------------------------------------------------
1616
*/
@@ -1381,7 +1381,7 @@ static struct config_int ConfigureNamesInt[] =
13811381
NULL
13821382
},
13831383
&MaxStandbyDelay,
1384-
30, 0, INT_MAX, NULL, NULL
1384+
30, -1, INT_MAX, NULL, NULL
13851385
},
13861386

13871387
{

src/include/storage/proc.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.119 2010/01/23 16:37:12 sriggs Exp $
10+
* $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.120 2010/02/13 01:32:20 sriggs Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -189,6 +189,7 @@ extern void ProcQueueInit(PROC_QUEUE *queue);
189189
extern int ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable);
190190
extern PGPROC *ProcWakeup(PGPROC *proc, int waitStatus);
191191
extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
192+
extern bool IsWaitingForLock(void);
192193
extern void LockWaitCancel(void);
193194

194195
extern void ProcWaitForSignal(void);

src/include/storage/procsignal.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/storage/procsignal.h,v 1.4 2010/01/23 16:37:12 sriggs Exp $
10+
* $PostgreSQL: pgsql/src/include/storage/procsignal.h,v 1.5 2010/02/13 01:32:20 sriggs Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -38,6 +38,7 @@ typedef enum
3838
PROCSIG_RECOVERY_CONFLICT_LOCK,
3939
PROCSIG_RECOVERY_CONFLICT_SNAPSHOT,
4040
PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
41+
PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
4142

4243
NUM_PROCSIGNALS /* Must be last! */
4344
} ProcSignalReason;

src/include/storage/standby.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/storage/standby.h,v 1.7 2010/01/31 19:01:11 sriggs Exp $
10+
* $PostgreSQL: pgsql/src/include/storage/standby.h,v 1.8 2010/02/13 01:32:20 sriggs Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -16,6 +16,7 @@
1616

1717
#include "access/xlog.h"
1818
#include "storage/lock.h"
19+
#include "storage/procsignal.h"
1920
#include "storage/relfilenode.h"
2021

2122
extern int vacuum_defer_cleanup_age;
@@ -30,7 +31,7 @@ extern void ResolveRecoveryConflictWithTablespace(Oid tsid);
3031
extern void ResolveRecoveryConflictWithDatabase(Oid dbid);
3132

3233
extern void ResolveRecoveryConflictWithBufferPin(void);
33-
extern void SendRecoveryConflictWithBufferPin(void);
34+
extern void SendRecoveryConflictWithBufferPin(ProcSignalReason reason);
3435
extern void CheckRecoveryConflictDeadlock(LWLockId partitionLock);
3536

3637
/*

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy