Skip to content

Commit dab421d

Browse files
committed
Seems I was too optimistic in supposing that sinval's maxMsgNum could be
read and written without a lock. The value itself is atomic, sure, but on processors with weak memory ordering it's possible for a reader to see the value change before it sees the associated message written into the buffer array. Fix by introducing a spinlock that's used just to read and write maxMsgNum. (We could do this with less overhead if we recognized a concept of "memory access barrier"; is it worth introducing such a thing? At the moment probably not --- I can't measure any clear slowdown from adding the spinlock, so this solution is probably fine.) Per buildfarm results.
1 parent fad153e commit dab421d

File tree

1 file changed

+48
-15
lines changed

1 file changed

+48
-15
lines changed

src/backend/storage/ipc/sinvaladt.c

Lines changed: 48 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.71 2008/06/19 21:32:56 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.72 2008/06/20 00:24:53 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -23,6 +23,7 @@
2323
#include "storage/proc.h"
2424
#include "storage/shmem.h"
2525
#include "storage/sinvaladt.h"
26+
#include "storage/spin.h"
2627

2728

2829
/*
@@ -84,10 +85,20 @@
8485
* can operate in parallel with one or more readers, because the writer
8586
* has no need to touch anyone's ProcState, except in the infrequent cases
8687
* when SICleanupQueue is needed. The only point of overlap is that
87-
* the writer might change maxMsgNum while readers are looking at it.
88-
* This should be okay: we are assuming that fetching or storing an int
89-
* is atomic, an assumption also made elsewhere in Postgres. However
90-
* readers mustn't assume that maxMsgNum isn't changing under them.
88+
* the writer wants to change maxMsgNum while readers need to read it.
89+
* We deal with that by having a spinlock that readers must take for just
90+
* long enough to read maxMsgNum, while writers take it for just long enough
91+
* to write maxMsgNum. (The exact rule is that you need the spinlock to
92+
* read maxMsgNum if you are not holding SInvalWriteLock, and you need the
93+
* spinlock to write maxMsgNum unless you are holding both locks.)
94+
*
95+
* Note: since maxMsgNum is an int and hence presumably atomically readable/
96+
* writable, the spinlock might seem unnecessary. The reason it is needed
97+
* is to provide a memory barrier: we need to be sure that messages written
98+
* to the array are actually there before maxMsgNum is increased, and that
99+
* readers will see that data after fetching maxMsgNum. Multiprocessors
100+
* that have weak memory-ordering guarantees can fail without the memory
101+
* barrier instructions that are included in the spinlock sequences.
91102
*/
92103

93104

@@ -153,6 +164,8 @@ typedef struct SISeg
153164
int lastBackend; /* index of last active procState entry, +1 */
154165
int maxBackends; /* size of procState array */
155166

167+
slock_t msgnumLock; /* spinlock protecting maxMsgNum */
168+
156169
/*
157170
* Circular buffer holding shared-inval messages
158171
*/
@@ -209,12 +222,13 @@ CreateSharedInvalidationState(void)
209222
if (found)
210223
return;
211224

212-
/* Clear message counters, save size of procState array */
225+
/* Clear message counters, save size of procState array, init spinlock */
213226
shmInvalBuffer->minMsgNum = 0;
214227
shmInvalBuffer->maxMsgNum = 0;
215228
shmInvalBuffer->nextThreshold = CLEANUP_MIN;
216229
shmInvalBuffer->lastBackend = 0;
217230
shmInvalBuffer->maxBackends = MaxBackends;
231+
SpinLockInit(&shmInvalBuffer->msgnumLock);
218232

219233
/* The buffer[] array is initially all unused, so we need not fill it */
220234

@@ -365,6 +379,7 @@ SIInsertDataEntries(const SharedInvalidationMessage *data, int n)
365379
{
366380
int nthistime = Min(n, WRITE_QUANTUM);
367381
int numMsgs;
382+
int max;
368383

369384
n -= nthistime;
370385

@@ -388,10 +403,21 @@ SIInsertDataEntries(const SharedInvalidationMessage *data, int n)
388403
/*
389404
* Insert new message(s) into proper slot of circular buffer
390405
*/
406+
max = segP->maxMsgNum;
391407
while (nthistime-- > 0)
392408
{
393-
segP->buffer[segP->maxMsgNum % MAXNUMMESSAGES] = *data++;
394-
segP->maxMsgNum++;
409+
segP->buffer[max % MAXNUMMESSAGES] = *data++;
410+
max++;
411+
}
412+
413+
/* Update current value of maxMsgNum using spinlock */
414+
{
415+
/* use volatile pointer to prevent code rearrangement */
416+
volatile SISeg *vsegP = segP;
417+
418+
SpinLockAcquire(&vsegP->msgnumLock);
419+
vsegP->maxMsgNum = max;
420+
SpinLockRelease(&vsegP->msgnumLock);
395421
}
396422

397423
LWLockRelease(SInvalWriteLock);
@@ -431,21 +457,32 @@ SIGetDataEntries(SharedInvalidationMessage *data, int datasize)
431457
{
432458
SISeg *segP;
433459
ProcState *stateP;
460+
int max;
434461
int n;
435462

436463
LWLockAcquire(SInvalReadLock, LW_SHARED);
437464

438465
segP = shmInvalBuffer;
439466
stateP = &segP->procState[MyBackendId - 1];
440467

468+
/* Fetch current value of maxMsgNum using spinlock */
469+
{
470+
/* use volatile pointer to prevent code rearrangement */
471+
volatile SISeg *vsegP = segP;
472+
473+
SpinLockAcquire(&vsegP->msgnumLock);
474+
max = vsegP->maxMsgNum;
475+
SpinLockRelease(&vsegP->msgnumLock);
476+
}
477+
441478
if (stateP->resetState)
442479
{
443480
/*
444481
* Force reset. We can say we have dealt with any messages added
445482
* since the reset, as well; and that means we should clear the
446483
* signaled flag, too.
447484
*/
448-
stateP->nextMsgNum = segP->maxMsgNum;
485+
stateP->nextMsgNum = max;
449486
stateP->resetState = false;
450487
stateP->signaled = false;
451488
LWLockRelease(SInvalReadLock);
@@ -459,13 +496,9 @@ SIGetDataEntries(SharedInvalidationMessage *data, int datasize)
459496
* There may be other backends that haven't read the message(s), so we
460497
* cannot delete them here. SICleanupQueue() will eventually remove them
461498
* from the queue.
462-
*
463-
* Note: depending on the compiler, we might read maxMsgNum only once
464-
* here, or each time through the loop. It doesn't matter (as long as
465-
* each fetch is atomic).
466499
*/
467500
n = 0;
468-
while (n < datasize && stateP->nextMsgNum < segP->maxMsgNum)
501+
while (n < datasize && stateP->nextMsgNum < max)
469502
{
470503
data[n++] = segP->buffer[stateP->nextMsgNum % MAXNUMMESSAGES];
471504
stateP->nextMsgNum++;
@@ -474,7 +507,7 @@ SIGetDataEntries(SharedInvalidationMessage *data, int datasize)
474507
/*
475508
* Reset our "signaled" flag whenever we have caught up completely.
476509
*/
477-
if (stateP->nextMsgNum >= segP->maxMsgNum)
510+
if (stateP->nextMsgNum >= max)
478511
stateP->signaled = false;
479512

480513
LWLockRelease(SInvalReadLock);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy