Skip to content

Commit d5f7d2c

Browse files
committed
Adopt a random backoff algorithm for sleep delays when waiting for a
spinlock. Per recent pghackers discussion.
1 parent a667288 commit d5f7d2c

File tree

1 file changed

+53
-18
lines changed

1 file changed

+53
-18
lines changed

src/backend/storage/lmgr/s_lock.c

Lines changed: 53 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
*
1010
*
1111
* IDENTIFICATION
12-
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/s_lock.c,v 1.14 2003/08/04 15:28:33 tgl Exp $
12+
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/s_lock.c,v 1.15 2003/08/06 16:43:43 tgl Exp $
1313
*
1414
*-------------------------------------------------------------------------
1515
*/
@@ -31,7 +31,7 @@ s_lock_stuck(volatile slock_t *lock, const char *file, int line)
3131
fprintf(stderr,
3232
"\nStuck spinlock (%p) detected at %s:%d.\n",
3333
lock, file, line);
34-
abort();
34+
exit(1);
3535
#else
3636
elog(PANIC, "stuck spinlock (%p) detected at %s:%d",
3737
lock, file, line);
@@ -45,35 +45,68 @@ s_lock_stuck(volatile slock_t *lock, const char *file, int line)
4545
void
4646
s_lock(volatile slock_t *lock, const char *file, int line)
4747
{
48-
unsigned spins = 0;
49-
unsigned delays = 0;
50-
struct timeval delay;
51-
5248
/*
5349
* We loop tightly for awhile, then delay using select() and try
5450
* again. Preferably, "awhile" should be a small multiple of the
5551
* maximum time we expect a spinlock to be held. 100 iterations seems
56-
* about right.
52+
* about right. In most multi-CPU scenarios, the spinlock is probably
53+
* held by a process on another CPU and will be released before we
54+
* finish 100 iterations. However, on a uniprocessor, the tight loop
55+
* is just a waste of cycles, so don't iterate thousands of times.
56+
*
57+
* Once we do decide to block, we use randomly increasing select() delays.
58+
* The first delay is 10 msec, then the delay randomly increases to about
59+
* one second, after which we reset to 10 msec and start again. The idea
60+
* here is that in the presence of heavy contention we need to increase
61+
* the delay, else the spinlock holder may never get to run and release
62+
* the lock. (Consider situation where spinlock holder has been nice'd
63+
* down in priority by the scheduler --- it will not get scheduled until
64+
* all would-be acquirers are sleeping, so if we always use a 10-msec
65+
* sleep, there is a real possibility of starvation.) But we can't just
66+
* clamp the delay to an upper bound, else it would take a long time to
67+
* make a reasonable number of tries.
5768
*
58-
* We use a 10 millisec select delay because that is the lower limit on
59-
* many platforms. The timeout is figured on this delay only, and so
60-
* the nominal 1 minute is a lower bound.
69+
* We time out and declare error after NUM_DELAYS delays (thus, exactly
70+
* that many tries). With the given settings, this will usually take
71+
* 3 or so minutes. It seems better to fix the total number of tries (and
72+
* thus the probability of unintended failure) than to fix the total time
73+
* spent.
74+
*
75+
* The select() delays are measured in centiseconds (0.01 sec) because
76+
* 10 msec is a common resolution limit at the OS level.
6177
*/
6278
#define SPINS_PER_DELAY 100
63-
#define DELAY_MSEC 10
64-
#define TIMEOUT_MSEC (60 * 1000)
79+
#define NUM_DELAYS 1000
80+
#define MIN_DELAY_CSEC 1
81+
#define MAX_DELAY_CSEC 100
82+
83+
int spins = 0;
84+
int delays = 0;
85+
int cur_delay = MIN_DELAY_CSEC;
86+
struct timeval delay;
6587

6688
while (TAS(lock))
6789
{
6890
if (++spins > SPINS_PER_DELAY)
6991
{
70-
if (++delays > (TIMEOUT_MSEC / DELAY_MSEC))
92+
if (++delays > NUM_DELAYS)
7193
s_lock_stuck(lock, file, line);
7294

73-
delay.tv_sec = 0;
74-
delay.tv_usec = DELAY_MSEC * 1000;
95+
delay.tv_sec = cur_delay / 100;
96+
delay.tv_usec = (cur_delay % 100) * 10000;
7597
(void) select(0, NULL, NULL, NULL, &delay);
7698

99+
#if defined(S_LOCK_TEST)
100+
fprintf(stdout, "*"); fflush(stdout);
101+
#endif
102+
103+
/* increase delay by a random fraction between 1X and 2X */
104+
cur_delay += (int) (cur_delay *
105+
(((double) random()) / ((double) MAX_RANDOM_VALUE)) + 0.5);
106+
/* wrap back to minimum delay when max is exceeded */
107+
if (cur_delay > MAX_DELAY_CSEC)
108+
cur_delay = MIN_DELAY_CSEC;
109+
77110
spins = 0;
78111
}
79112
}
@@ -217,6 +250,8 @@ volatile slock_t test_lock;
217250
int
218251
main()
219252
{
253+
srandom((unsigned int) time(NULL));
254+
220255
S_INIT_LOCK(&test_lock);
221256

222257
if (!S_LOCK_FREE(&test_lock))
@@ -249,9 +284,9 @@ main()
249284
return 1;
250285
}
251286

252-
printf("S_LOCK_TEST: this will hang for a minute or so and then abort\n");
253-
printf(" with a 'stuck spinlock' message if S_LOCK()\n");
254-
printf(" and TAS() are working.\n");
287+
printf("S_LOCK_TEST: this will print %d stars and then\n", NUM_DELAYS);
288+
printf(" exit with a 'stuck spinlock' message\n");
289+
printf(" if S_LOCK() and TAS() are working.\n");
255290
fflush(stdout);
256291

257292
s_lock(&test_lock, __FILE__, __LINE__);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy