Skip to content

Commit 53c5b86

Browse files
committed
Tighten up error recovery for fast-path locking.
The previous code could cause a backend crash after BEGIN; SAVEPOINT a; LOCK TABLE foo (interrupted by ^C or statement timeout); ROLLBACK TO SAVEPOINT a; LOCK TABLE foo, and might have leaked strong-lock counts in other situations. Report by Zoltán Böszörményi; patch review by Jeff Davis.
1 parent ab77b2d commit 53c5b86

File tree

7 files changed

+94
-31
lines changed

7 files changed

+94
-31
lines changed

src/backend/access/transam/xact.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2259,7 +2259,7 @@ AbortTransaction(void)
22592259
* Also clean up any open wait for lock, since the lock manager will choke
22602260
* if we try to wait for another lock before doing this.
22612261
*/
2262-
LockWaitCancel();
2262+
LockErrorCleanup();
22632263

22642264
/*
22652265
* check the current transaction state
@@ -4144,7 +4144,7 @@ AbortSubTransaction(void)
41444144
AbortBufferIO();
41454145
UnlockBuffers();
41464146

4147-
LockWaitCancel();
4147+
LockErrorCleanup();
41484148

41494149
/*
41504150
* check the current transaction state

src/backend/storage/lmgr/README

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,7 @@ examines every member of the wait queue (this was not true in the 7.0
560560
implementation, BTW). Therefore, if ProcLockWakeup is always invoked
561561
after a lock is released or a wait queue is rearranged, there can be no
562562
failure to wake a wakable process. One should also note that
563-
LockWaitCancel (abort a waiter due to outside factors) must run
563+
LockErrorCleanup (abort a waiter due to outside factors) must run
564564
ProcLockWakeup, in case the canceled waiter was soft-blocking other
565565
waiters.
566566

src/backend/storage/lmgr/lock.c

Lines changed: 75 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,8 @@ static HTAB *LockMethodProcLockHash;
250250
static HTAB *LockMethodLocalHash;
251251

252252

253-
/* private state for GrantAwaitedLock */
253+
/* private state for error cleanup */
254+
static LOCALLOCK *StrongLockInProgress;
254255
static LOCALLOCK *awaitedLock;
255256
static ResourceOwner awaitedOwner;
256257

@@ -338,6 +339,8 @@ static void RemoveLocalLock(LOCALLOCK *locallock);
338339
static PROCLOCK *SetupLockInTable(LockMethod lockMethodTable, PGPROC *proc,
339340
const LOCKTAG *locktag, uint32 hashcode, LOCKMODE lockmode);
340341
static void GrantLockLocal(LOCALLOCK *locallock, ResourceOwner owner);
342+
static void BeginStrongLockAcquire(LOCALLOCK *locallock, uint32 fasthashcode);
343+
static void FinishStrongLockAcquire(void);
341344
static void WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner);
342345
static void ReleaseLockForOwner(LOCALLOCK *locallock, ResourceOwner owner);
343346
static bool UnGrantLock(LOCK *lock, LOCKMODE lockmode,
@@ -738,22 +741,11 @@ LockAcquireExtended(const LOCKTAG *locktag,
738741
}
739742
else if (FastPathStrongMode(lockmode))
740743
{
741-
/*
742-
* Adding to a memory location is not atomic, so we take a
743-
* spinlock to ensure we don't collide with someone else trying
744-
* to bump the count at the same time.
745-
*
746-
* XXX: It might be worth considering using an atomic fetch-and-add
747-
* instruction here, on architectures where that is supported.
748-
*/
749-
Assert(locallock->holdsStrongLockCount == FALSE);
750-
SpinLockAcquire(&FastPathStrongRelationLocks->mutex);
751-
FastPathStrongRelationLocks->count[fasthashcode]++;
752-
locallock->holdsStrongLockCount = TRUE;
753-
SpinLockRelease(&FastPathStrongRelationLocks->mutex);
744+
BeginStrongLockAcquire(locallock, fasthashcode);
754745
if (!FastPathTransferRelationLocks(lockMethodTable, locktag,
755746
hashcode))
756747
{
748+
AbortStrongLockAcquire();
757749
if (reportMemoryError)
758750
ereport(ERROR,
759751
(errcode(ERRCODE_OUT_OF_MEMORY),
@@ -779,6 +771,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
779771
hashcode, lockmode);
780772
if (!proclock)
781773
{
774+
AbortStrongLockAcquire();
782775
LWLockRelease(partitionLock);
783776
if (reportMemoryError)
784777
ereport(ERROR,
@@ -820,6 +813,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
820813
*/
821814
if (dontWait)
822815
{
816+
AbortStrongLockAcquire();
823817
if (proclock->holdMask == 0)
824818
{
825819
uint32 proclock_hashcode;
@@ -884,6 +878,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
884878
*/
885879
if (!(proclock->holdMask & LOCKBIT_ON(lockmode)))
886880
{
881+
AbortStrongLockAcquire();
887882
PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock);
888883
LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode);
889884
/* Should we retry ? */
@@ -894,6 +889,12 @@ LockAcquireExtended(const LOCKTAG *locktag,
894889
LOCK_PRINT("LockAcquire: granted", lock, lockmode);
895890
}
896891

892+
/*
893+
* Lock state is fully up-to-date now; if we error out after this, no
894+
* special error cleanup is required.
895+
*/
896+
FinishStrongLockAcquire();
897+
897898
LWLockRelease(partitionLock);
898899

899900
/*
@@ -1349,6 +1350,64 @@ GrantLockLocal(LOCALLOCK *locallock, ResourceOwner owner)
13491350
locallock->numLockOwners++;
13501351
}
13511352

1353+
/*
1354+
* BeginStrongLockAcquire - inhibit use of fastpath for a given LOCALLOCK,
1355+
* and arrange for error cleanup if it fails
1356+
*/
1357+
static void
1358+
BeginStrongLockAcquire(LOCALLOCK *locallock, uint32 fasthashcode)
1359+
{
1360+
Assert(StrongLockInProgress == NULL);
1361+
Assert(locallock->holdsStrongLockCount == FALSE);
1362+
1363+
/*
1364+
* Adding to a memory location is not atomic, so we take a
1365+
* spinlock to ensure we don't collide with someone else trying
1366+
* to bump the count at the same time.
1367+
*
1368+
* XXX: It might be worth considering using an atomic fetch-and-add
1369+
* instruction here, on architectures where that is supported.
1370+
*/
1371+
1372+
SpinLockAcquire(&FastPathStrongRelationLocks->mutex);
1373+
FastPathStrongRelationLocks->count[fasthashcode]++;
1374+
locallock->holdsStrongLockCount = TRUE;
1375+
StrongLockInProgress = locallock;
1376+
SpinLockRelease(&FastPathStrongRelationLocks->mutex);
1377+
}
1378+
1379+
/*
1380+
* FinishStrongLockAcquire - cancel pending cleanup for a strong lock
1381+
* acquisition once it's no longer needed
1382+
*/
1383+
static void
1384+
FinishStrongLockAcquire(void)
1385+
{
1386+
StrongLockInProgress = NULL;
1387+
}
1388+
1389+
/*
1390+
* AbortStrongLockAcquire - undo strong lock state changes performed by
1391+
* BeginStrongLockAcquire.
1392+
*/
1393+
void
1394+
AbortStrongLockAcquire(void)
1395+
{
1396+
uint32 fasthashcode;
1397+
LOCALLOCK *locallock = StrongLockInProgress;
1398+
1399+
if (locallock == NULL)
1400+
return;
1401+
1402+
fasthashcode = FastPathStrongLockHashPartition(locallock->hashcode);
1403+
Assert(locallock->holdsStrongLockCount == TRUE);
1404+
SpinLockAcquire(&FastPathStrongRelationLocks->mutex);
1405+
FastPathStrongRelationLocks->count[fasthashcode]--;
1406+
locallock->holdsStrongLockCount = FALSE;
1407+
StrongLockInProgress = NULL;
1408+
SpinLockRelease(&FastPathStrongRelationLocks->mutex);
1409+
}
1410+
13521411
/*
13531412
* GrantAwaitedLock -- call GrantLockLocal for the lock we are doing
13541413
* WaitOnLock on.
@@ -1414,7 +1473,7 @@ WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner)
14141473
* We can and do use a PG_TRY block to try to clean up after failure, but
14151474
* this still has a major limitation: elog(FATAL) can occur while waiting
14161475
* (eg, a "die" interrupt), and then control won't come back here. So all
1417-
* cleanup of essential state should happen in LockWaitCancel, not here.
1476+
* cleanup of essential state should happen in LockErrorCleanup, not here.
14181477
* We can use PG_TRY to clear the "waiting" status flags, since doing that
14191478
* is unimportant if the process exits.
14201479
*/
@@ -1441,7 +1500,7 @@ WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner)
14411500
}
14421501
PG_CATCH();
14431502
{
1444-
/* In this path, awaitedLock remains set until LockWaitCancel */
1503+
/* In this path, awaitedLock remains set until LockErrorCleanup */
14451504

14461505
/* Report change to non-waiting status */
14471506
pgstat_report_waiting(false);

src/backend/storage/lmgr/proc.c

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -635,17 +635,20 @@ IsWaitingForLock(void)
635635
}
636636

637637
/*
638-
* Cancel any pending wait for lock, when aborting a transaction.
638+
* Cancel any pending wait for lock, when aborting a transaction, and revert
639+
* any strong lock count acquisition for a lock being acquired.
639640
*
640641
* (Normally, this would only happen if we accept a cancel/die
641-
* interrupt while waiting; but an ereport(ERROR) while waiting is
642-
* within the realm of possibility, too.)
642+
* interrupt while waiting; but an ereport(ERROR) before or during the lock
643+
* wait is within the realm of possibility, too.)
643644
*/
644645
void
645-
LockWaitCancel(void)
646+
LockErrorCleanup(void)
646647
{
647648
LWLockId partitionLock;
648649

650+
AbortStrongLockAcquire();
651+
649652
/* Nothing to do if we weren't waiting for a lock */
650653
if (lockAwaited == NULL)
651654
return;
@@ -709,7 +712,7 @@ ProcReleaseLocks(bool isCommit)
709712
if (!MyProc)
710713
return;
711714
/* If waiting, get off wait queue (should only be needed after error) */
712-
LockWaitCancel();
715+
LockErrorCleanup();
713716
/* Release locks */
714717
LockReleaseAll(DEFAULT_LOCKMETHOD, !isCommit);
715718

@@ -1019,7 +1022,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
10191022
* NOTE: this may also cause us to exit critical-section state, possibly
10201023
* allowing a cancel/die interrupt to be accepted. This is OK because we
10211024
* have recorded the fact that we are waiting for a lock, and so
1022-
* LockWaitCancel will clean up if cancel/die happens.
1025+
* LockErrorCleanup will clean up if cancel/die happens.
10231026
*/
10241027
LWLockRelease(partitionLock);
10251028

@@ -1062,7 +1065,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
10621065
* don't, because we have no shared-state-change work to do after being
10631066
* granted the lock (the grantor did it all). We do have to worry about
10641067
* updating the locallock table, but if we lose control to an error,
1065-
* LockWaitCancel will fix that up.
1068+
* LockErrorCleanup will fix that up.
10661069
*/
10671070
do
10681071
{
@@ -1207,7 +1210,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
12071210
LWLockAcquire(partitionLock, LW_EXCLUSIVE);
12081211

12091212
/*
1210-
* We no longer want LockWaitCancel to do anything.
1213+
* We no longer want LockErrorCleanup to do anything.
12111214
*/
12121215
lockAwaited = NULL;
12131216

src/backend/tcop/postgres.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2575,7 +2575,7 @@ die(SIGNAL_ARGS)
25752575
/* bump holdoff count to make ProcessInterrupts() a no-op */
25762576
/* until we are done getting ready for it */
25772577
InterruptHoldoffCount++;
2578-
LockWaitCancel(); /* prevent CheckDeadLock from running */
2578+
LockErrorCleanup(); /* prevent CheckDeadLock from running */
25792579
DisableNotifyInterrupt();
25802580
DisableCatchupInterrupt();
25812581
InterruptHoldoffCount--;
@@ -2617,7 +2617,7 @@ StatementCancelHandler(SIGNAL_ARGS)
26172617
/* bump holdoff count to make ProcessInterrupts() a no-op */
26182618
/* until we are done getting ready for it */
26192619
InterruptHoldoffCount++;
2620-
LockWaitCancel(); /* prevent CheckDeadLock from running */
2620+
LockErrorCleanup(); /* prevent CheckDeadLock from running */
26212621
DisableNotifyInterrupt();
26222622
DisableCatchupInterrupt();
26232623
InterruptHoldoffCount--;
@@ -2776,7 +2776,7 @@ RecoveryConflictInterrupt(ProcSignalReason reason)
27762776
/* bump holdoff count to make ProcessInterrupts() a no-op */
27772777
/* until we are done getting ready for it */
27782778
InterruptHoldoffCount++;
2779-
LockWaitCancel(); /* prevent CheckDeadLock from running */
2779+
LockErrorCleanup(); /* prevent CheckDeadLock from running */
27802780
DisableNotifyInterrupt();
27812781
DisableCatchupInterrupt();
27822782
InterruptHoldoffCount--;

src/include/storage/lock.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,7 @@ extern LockAcquireResult LockAcquireExtended(const LOCKTAG *locktag,
489489
bool sessionLock,
490490
bool dontWait,
491491
bool report_memory_error);
492+
extern void AbortStrongLockAcquire(void);
492493
extern bool LockRelease(const LOCKTAG *locktag,
493494
LOCKMODE lockmode, bool sessionLock);
494495
extern void LockReleaseSession(LOCKMETHODID lockmethodid);

src/include/storage/proc.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ extern int ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable);
244244
extern PGPROC *ProcWakeup(PGPROC *proc, int waitStatus);
245245
extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
246246
extern bool IsWaitingForLock(void);
247-
extern void LockWaitCancel(void);
247+
extern void LockErrorCleanup(void);
248248

249249
extern void ProcWaitForSignal(void);
250250
extern void ProcSendSignal(int pid);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy