Skip to content

Commit 008608b

Browse files
committed
Avoid the use of a separate spinlock to protect a LWLock's wait queue.
Previously we used a spinlock, in adition to the atomically manipulated ->state field, to protect the wait queue. But it's pretty simple to instead perform the locking using a flag in state. Due to 6150a1b BufferDescs, on platforms (like PPC) with > 1 byte spinlocks, increased their size above 64byte. As 64 bytes are the size we pad allocated BufferDescs to, this can increase false sharing; causing performance problems in turn. Together with the previous commit this reduces the size to <= 64 bytes on all common platforms. Author: Andres Freund Discussion: CAA4eK1+ZeB8PMwwktf+3bRS0Pt4Ux6Rs6Aom0uip8c6shJWmyg@mail.gmail.com 20160327121858.zrmrjegmji2ymnvr@alap3.anarazel.de
1 parent 4835458 commit 008608b

File tree

2 files changed

+113
-82
lines changed

2 files changed

+113
-82
lines changed

src/backend/storage/lmgr/lwlock.c

Lines changed: 112 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ extern slock_t *ShmemLock;
9797

9898
#define LW_FLAG_HAS_WAITERS ((uint32) 1 << 30)
9999
#define LW_FLAG_RELEASE_OK ((uint32) 1 << 29)
100+
#define LW_FLAG_LOCKED ((uint32) 1 << 28)
100101

101102
#define LW_VAL_EXCLUSIVE ((uint32) 1 << 24)
102103
#define LW_VAL_SHARED 1
@@ -711,7 +712,6 @@ RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
711712
void
712713
LWLockInitialize(LWLock *lock, int tranche_id)
713714
{
714-
SpinLockInit(&lock->mutex);
715715
pg_atomic_init_u32(&lock->state, LW_FLAG_RELEASE_OK);
716716
#ifdef LOCK_DEBUG
717717
pg_atomic_init_u32(&lock->nwaiters, 0);
@@ -842,6 +842,74 @@ LWLockAttemptLock(LWLock *lock, LWLockMode mode)
842842
pg_unreachable();
843843
}
844844

845+
/*
846+
* Lock the LWLock's wait list against concurrent activity.
847+
*
848+
* NB: even though the wait list is locked, non-conflicting lock operations
849+
* may still happen concurrently.
850+
*
851+
* Time spent holding mutex should be short!
852+
*/
853+
static void
854+
LWLockWaitListLock(LWLock *lock)
855+
{
856+
uint32 old_state;
857+
#ifdef LWLOCK_STATS
858+
lwlock_stats *lwstats;
859+
uint32 delays = 0;
860+
861+
lwstats = get_lwlock_stats_entry(lock);
862+
#endif
863+
864+
while (true)
865+
{
866+
/* always try once to acquire lock directly */
867+
old_state = pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_LOCKED);
868+
if (!(old_state & LW_FLAG_LOCKED))
869+
break; /* got lock */
870+
871+
/* and then spin without atomic operations until lock is released */
872+
{
873+
SpinDelayStatus delayStatus = init_spin_delay(&lock->state);
874+
875+
while (old_state & LW_FLAG_LOCKED)
876+
{
877+
perform_spin_delay(&delayStatus);
878+
old_state = pg_atomic_read_u32(&lock->state);
879+
}
880+
#ifdef LWLOCK_STATS
881+
delays += delayStatus.delays;
882+
#endif
883+
finish_spin_delay(&delayStatus);
884+
}
885+
886+
/*
887+
* Retry. The lock might obviously already be re-acquired by the time
888+
* we're attempting to get it again.
889+
*/
890+
}
891+
892+
#ifdef LWLOCK_STATS
893+
lwstats->spin_delay_count += delays;
894+
#endif
895+
}
896+
897+
/*
898+
* Unlock the LWLock's wait list.
899+
*
900+
* Note that it can be more efficient to manipulate flags and release the
901+
* locks in a single atomic operation.
902+
*/
903+
static void
904+
LWLockWaitListUnlock(LWLock *lock)
905+
{
906+
uint32 old_state PG_USED_FOR_ASSERTS_ONLY;
907+
908+
old_state = pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_LOCKED);
909+
910+
Assert(old_state & LW_FLAG_LOCKED);
911+
}
912+
845913
/*
846914
* Wakeup all the lockers that currently have a chance to acquire the lock.
847915
*/
@@ -852,22 +920,13 @@ LWLockWakeup(LWLock *lock)
852920
bool wokeup_somebody = false;
853921
dlist_head wakeup;
854922
dlist_mutable_iter iter;
855-
#ifdef LWLOCK_STATS
856-
lwlock_stats *lwstats;
857-
858-
lwstats = get_lwlock_stats_entry(lock);
859-
#endif
860923

861924
dlist_init(&wakeup);
862925

863926
new_release_ok = true;
864927

865-
/* Acquire mutex. Time spent holding mutex should be short! */
866-
#ifdef LWLOCK_STATS
867-
lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex);
868-
#else
869-
SpinLockAcquire(&lock->mutex);
870-
#endif
928+
/* lock wait list while collecting backends to wake up */
929+
LWLockWaitListLock(lock);
871930

872931
dlist_foreach_modify(iter, &lock->waiters)
873932
{
@@ -904,19 +963,33 @@ LWLockWakeup(LWLock *lock)
904963

905964
Assert(dlist_is_empty(&wakeup) || pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS);
906965

907-
/* Unset both flags at once if required */
908-
if (!new_release_ok && dlist_is_empty(&wakeup))
909-
pg_atomic_fetch_and_u32(&lock->state,
910-
~(LW_FLAG_RELEASE_OK | LW_FLAG_HAS_WAITERS));
911-
else if (!new_release_ok)
912-
pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_RELEASE_OK);
913-
else if (dlist_is_empty(&wakeup))
914-
pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_HAS_WAITERS);
915-
else if (new_release_ok)
916-
pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
966+
/* unset required flags, and release lock, in one fell swoop */
967+
{
968+
uint32 old_state;
969+
uint32 desired_state;
970+
971+
old_state = pg_atomic_read_u32(&lock->state);
972+
while (true)
973+
{
974+
desired_state = old_state;
975+
976+
/* compute desired flags */
977+
978+
if (new_release_ok)
979+
desired_state |= LW_FLAG_RELEASE_OK;
980+
else
981+
desired_state &= ~LW_FLAG_RELEASE_OK;
917982

918-
/* We are done updating the shared state of the lock queue. */
919-
SpinLockRelease(&lock->mutex);
983+
if (dlist_is_empty(&wakeup))
984+
desired_state &= ~LW_FLAG_HAS_WAITERS;
985+
986+
desired_state &= ~LW_FLAG_LOCKED; /* release lock */
987+
988+
if (pg_atomic_compare_exchange_u32(&lock->state, &old_state,
989+
desired_state))
990+
break;
991+
}
992+
}
920993

921994
/* Awaken any waiters I removed from the queue. */
922995
dlist_foreach_modify(iter, &wakeup)
@@ -933,7 +1006,7 @@ LWLockWakeup(LWLock *lock)
9331006
* that happens before the list unlink happens, the list would end up
9341007
* being corrupted.
9351008
*
936-
* The barrier pairs with the SpinLockAcquire() when enqueing for
1009+
* The barrier pairs with the LWLockWaitListLock() when enqueing for
9371010
* another lock.
9381011
*/
9391012
pg_write_barrier();
@@ -950,12 +1023,6 @@ LWLockWakeup(LWLock *lock)
9501023
static void
9511024
LWLockQueueSelf(LWLock *lock, LWLockMode mode)
9521025
{
953-
#ifdef LWLOCK_STATS
954-
lwlock_stats *lwstats;
955-
956-
lwstats = get_lwlock_stats_entry(lock);
957-
#endif
958-
9591026
/*
9601027
* If we don't have a PGPROC structure, there's no way to wait. This
9611028
* should never occur, since MyProc should only be null during shared
@@ -967,11 +1034,7 @@ LWLockQueueSelf(LWLock *lock, LWLockMode mode)
9671034
if (MyProc->lwWaiting)
9681035
elog(PANIC, "queueing for lock while waiting on another one");
9691036

970-
#ifdef LWLOCK_STATS
971-
lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex);
972-
#else
973-
SpinLockAcquire(&lock->mutex);
974-
#endif
1037+
LWLockWaitListLock(lock);
9751038

9761039
/* setting the flag is protected by the spinlock */
9771040
pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS);
@@ -986,7 +1049,7 @@ LWLockQueueSelf(LWLock *lock, LWLockMode mode)
9861049
dlist_push_tail(&lock->waiters, &MyProc->lwWaitLink);
9871050

9881051
/* Can release the mutex now */
989-
SpinLockRelease(&lock->mutex);
1052+
LWLockWaitListUnlock(lock);
9901053

9911054
#ifdef LOCK_DEBUG
9921055
pg_atomic_fetch_add_u32(&lock->nwaiters, 1);
@@ -1015,11 +1078,7 @@ LWLockDequeueSelf(LWLock *lock)
10151078
lwstats->dequeue_self_count++;
10161079
#endif
10171080

1018-
#ifdef LWLOCK_STATS
1019-
lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex);
1020-
#else
1021-
SpinLockAcquire(&lock->mutex);
1022-
#endif
1081+
LWLockWaitListLock(lock);
10231082

10241083
/*
10251084
* Can't just remove ourselves from the list, but we need to iterate over
@@ -1043,7 +1102,8 @@ LWLockDequeueSelf(LWLock *lock)
10431102
pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_HAS_WAITERS);
10441103
}
10451104

1046-
SpinLockRelease(&lock->mutex);
1105+
/* XXX: combine with fetch_and above? */
1106+
LWLockWaitListUnlock(lock);
10471107

10481108
/* clear waiting state again, nice for debugging */
10491109
if (found)
@@ -1460,11 +1520,6 @@ LWLockConflictsWithVar(LWLock *lock,
14601520
{
14611521
bool mustwait;
14621522
uint64 value;
1463-
#ifdef LWLOCK_STATS
1464-
lwlock_stats *lwstats;
1465-
1466-
lwstats = get_lwlock_stats_entry(lock);
1467-
#endif
14681523

14691524
/*
14701525
* Test first to see if it the slot is free right now.
@@ -1484,17 +1539,13 @@ LWLockConflictsWithVar(LWLock *lock,
14841539
*result = false;
14851540

14861541
/*
1487-
* Read value using spinlock as we can't rely on atomic 64 bit
1488-
* reads/stores. TODO: On platforms with a way to do atomic 64 bit
1489-
* reads/writes the spinlock could be optimized away.
1542+
* Read value using the lwlock's wait list lock, as we can't generally
1543+
* rely on atomic 64 bit reads/stores. TODO: On platforms with a way to
1544+
* do atomic 64 bit reads/writes the spinlock should be optimized away.
14901545
*/
1491-
#ifdef LWLOCK_STATS
1492-
lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex);
1493-
#else
1494-
SpinLockAcquire(&lock->mutex);
1495-
#endif
1546+
LWLockWaitListLock(lock);
14961547
value = *valptr;
1497-
SpinLockRelease(&lock->mutex);
1548+
LWLockWaitListUnlock(lock);
14981549

14991550
if (value != oldval)
15001551
{
@@ -1668,22 +1719,12 @@ LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 val)
16681719
{
16691720
dlist_head wakeup;
16701721
dlist_mutable_iter iter;
1671-
#ifdef LWLOCK_STATS
1672-
lwlock_stats *lwstats;
1673-
1674-
lwstats = get_lwlock_stats_entry(lock);
1675-
#endif
16761722

16771723
PRINT_LWDEBUG("LWLockUpdateVar", lock, LW_EXCLUSIVE);
16781724

16791725
dlist_init(&wakeup);
16801726

1681-
/* Acquire mutex. Time spent holding mutex should be short! */
1682-
#ifdef LWLOCK_STATS
1683-
lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex);
1684-
#else
1685-
SpinLockAcquire(&lock->mutex);
1686-
#endif
1727+
LWLockWaitListLock(lock);
16871728

16881729
Assert(pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE);
16891730

@@ -1706,7 +1747,7 @@ LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 val)
17061747
}
17071748

17081749
/* We are done updating shared state of the lock itself. */
1709-
SpinLockRelease(&lock->mutex);
1750+
LWLockWaitListUnlock(lock);
17101751

17111752
/*
17121753
* Awaken any waiters I removed from the queue.
@@ -1804,21 +1845,15 @@ LWLockRelease(LWLock *lock)
18041845
void
18051846
LWLockReleaseClearVar(LWLock *lock, uint64 *valptr, uint64 val)
18061847
{
1807-
#ifdef LWLOCK_STATS
1808-
lwlock_stats *lwstats;
1848+
LWLockWaitListLock(lock);
18091849

1810-
lwstats = get_lwlock_stats_entry(lock);
1811-
lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex);
1812-
#else
1813-
SpinLockAcquire(&lock->mutex);
1814-
#endif
18151850
/*
18161851
* Set the variable's value before releasing the lock, that prevents race
18171852
* a race condition wherein a new locker acquires the lock, but hasn't yet
18181853
* set the variables value.
18191854
*/
18201855
*valptr = val;
1821-
SpinLockRelease(&lock->mutex);
1856+
LWLockWaitListUnlock(lock);
18221857

18231858
LWLockRelease(lock);
18241859
}

src/include/storage/lwlock.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,15 +57,11 @@ typedef struct LWLockTranche
5757
*/
5858
typedef struct LWLock
5959
{
60-
slock_t mutex; /* Protects LWLock and queue of PGPROCs */
6160
uint16 tranche; /* tranche ID */
62-
6361
pg_atomic_uint32 state; /* state of exclusive/nonexclusive lockers */
64-
#ifdef LOCK_DEBUG
65-
pg_atomic_uint32 nwaiters; /* number of waiters */
66-
#endif
6762
dlist_head waiters; /* list of waiting PGPROCs */
6863
#ifdef LOCK_DEBUG
64+
pg_atomic_uint32 nwaiters; /* number of waiters */
6965
struct PGPROC *owner; /* last exclusive owner of the lock */
7066
#endif
7167
} LWLock;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy