Skip to content

Commit a615acf

Browse files
committed
Arrange for read-only accesses to SLRU page buffers to take only a shared
lock, not exclusive, if the desired page is already in memory. This can be demonstrated to be a significant win on the pg_subtrans cache when there is a large window of open transactions. It should be useful for pg_clog as well. I didn't try to make GetMultiXactIdMembers() use the code, as that would have taken some restructuring, and what with the local cache for multixact contents it probably wouldn't really make a difference. Per my recent proposal.
1 parent 953208a commit a615acf

File tree

5 files changed

+145
-28
lines changed

5 files changed

+145
-28
lines changed

src/backend/access/transam/clog.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
2525
* Portions Copyright (c) 1994, Regents of the University of California
2626
*
27-
* $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.34 2005/11/05 21:19:47 tgl Exp $
27+
* $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.35 2005/12/06 18:10:06 tgl Exp $
2828
*
2929
*-------------------------------------------------------------------------
3030
*/
@@ -128,9 +128,9 @@ TransactionIdGetStatus(TransactionId xid)
128128
char *byteptr;
129129
XidStatus status;
130130

131-
LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
131+
/* lock is acquired by SimpleLruReadPage_ReadOnly */
132132

133-
slotno = SimpleLruReadPage(ClogCtl, pageno, xid);
133+
slotno = SimpleLruReadPage_ReadOnly(ClogCtl, pageno, xid);
134134
byteptr = ClogCtl->shared->page_buffer[slotno] + byteno;
135135

136136
status = (*byteptr >> bshift) & CLOG_XACT_BITMASK;

src/backend/access/transam/multixact.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
4343
* Portions Copyright (c) 1994, Regents of the University of California
4444
*
45-
* $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.13 2005/11/22 18:17:07 momjian Exp $
45+
* $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.14 2005/12/06 18:10:06 tgl Exp $
4646
*
4747
*-------------------------------------------------------------------------
4848
*/
@@ -1690,12 +1690,12 @@ TruncateMultiXact(void)
16901690
int entryno;
16911691
MultiXactOffset *offptr;
16921692

1693-
LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
1693+
/* lock is acquired by SimpleLruReadPage_ReadOnly */
16941694

16951695
pageno = MultiXactIdToOffsetPage(oldestMXact);
16961696
entryno = MultiXactIdToOffsetEntry(oldestMXact);
16971697

1698-
slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, oldestMXact);
1698+
slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno, oldestMXact);
16991699
offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
17001700
offptr += entryno;
17011701
oldestOffset = *offptr;

src/backend/access/transam/slru.c

Lines changed: 120 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
* reading in or writing out a page buffer does not hold the control lock,
2020
* only the per-buffer lock for the buffer it is working on.
2121
*
22+
* "Holding the control lock" means exclusive lock in all cases except for
23+
* SimpleLruReadPage_ReadOnly(); see comments for SlruRecentlyUsed() for
24+
* the implications of that.
25+
*
2226
* When initiating I/O on a buffer, we acquire the per-buffer lock exclusively
2327
* before releasing the control lock. The per-buffer lock is released after
2428
* completing the I/O, re-acquiring the control lock, and updating the shared
@@ -37,7 +41,7 @@
3741
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
3842
* Portions Copyright (c) 1994, Regents of the University of California
3943
*
40-
* $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.31 2005/11/22 18:17:07 momjian Exp $
44+
* $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.32 2005/12/06 18:10:06 tgl Exp $
4145
*
4246
*-------------------------------------------------------------------------
4347
*/
@@ -91,15 +95,30 @@ typedef struct SlruFlushData
9195
} SlruFlushData;
9296

9397
/*
94-
* Macro to mark a buffer slot "most recently used".
98+
* Macro to mark a buffer slot "most recently used". Note multiple evaluation
99+
* of arguments!
100+
*
101+
* The reason for the if-test is that there are often many consecutive
102+
* accesses to the same page (particularly the latest page). By suppressing
103+
* useless increments of cur_lru_count, we reduce the probability that old
104+
* pages' counts will "wrap around" and make them appear recently used.
105+
*
106+
* We allow this code to be executed concurrently by multiple processes within
107+
* SimpleLruReadPage_ReadOnly(). As long as int reads and writes are atomic,
108+
* this should not cause any completely-bogus values to enter the computation.
109+
* However, it is possible for either cur_lru_count or individual
110+
* page_lru_count entries to be "reset" to lower values than they should have,
111+
* in case a process is delayed while it executes this macro. With care in
112+
* SlruSelectLRUPage(), this does little harm, and in any case the absolute
113+
* worst possible consequence is a nonoptimal choice of page to evict. The
114+
* gain from allowing concurrent reads of SLRU pages seems worth it.
95115
*/
96116
#define SlruRecentlyUsed(shared, slotno) \
97117
do { \
98-
if ((shared)->page_lru_count[slotno] != 0) { \
99-
int iilru; \
100-
for (iilru = 0; iilru < NUM_SLRU_BUFFERS; iilru++) \
101-
(shared)->page_lru_count[iilru]++; \
102-
(shared)->page_lru_count[slotno] = 0; \
118+
int new_lru_count = (shared)->cur_lru_count; \
119+
if (new_lru_count != (shared)->page_lru_count[slotno]) { \
120+
(shared)->cur_lru_count = ++new_lru_count; \
121+
(shared)->page_lru_count[slotno] = new_lru_count; \
103122
} \
104123
} while (0)
105124

@@ -165,7 +184,7 @@ SimpleLruInit(SlruCtl ctl, const char *name,
165184
shared->page_buffer[slotno] = bufptr;
166185
shared->page_status[slotno] = SLRU_PAGE_EMPTY;
167186
shared->page_dirty[slotno] = false;
168-
shared->page_lru_count[slotno] = 1;
187+
shared->page_lru_count[slotno] = 0;
169188
shared->buffer_locks[slotno] = LWLockAssign();
170189
bufptr += BLCKSZ;
171190
}
@@ -351,6 +370,49 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid)
351370
}
352371
}
353372

373+
/*
374+
* Find a page in a shared buffer, reading it in if necessary.
375+
* The page number must correspond to an already-initialized page.
376+
* The caller must intend only read-only access to the page.
377+
*
378+
* The passed-in xid is used only for error reporting, and may be
379+
* InvalidTransactionId if no specific xid is associated with the action.
380+
*
381+
* Return value is the shared-buffer slot number now holding the page.
382+
* The buffer's LRU access info is updated.
383+
*
384+
* Control lock must NOT be held at entry, but will be held at exit.
385+
* It is unspecified whether the lock will be shared or exclusive.
386+
*/
387+
int
388+
SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid)
389+
{
390+
SlruShared shared = ctl->shared;
391+
int slotno;
392+
393+
/* Try to find the page while holding only shared lock */
394+
LWLockAcquire(shared->ControlLock, LW_SHARED);
395+
396+
/* See if page is already in a buffer */
397+
for (slotno = 0; slotno < NUM_SLRU_BUFFERS; slotno++)
398+
{
399+
if (shared->page_number[slotno] == pageno &&
400+
shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
401+
shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS)
402+
{
403+
/* See comments for SlruRecentlyUsed macro */
404+
SlruRecentlyUsed(shared, slotno);
405+
return slotno;
406+
}
407+
}
408+
409+
/* No luck, so switch to normal exclusive lock and do regular read */
410+
LWLockRelease(shared->ControlLock);
411+
LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
412+
413+
return SimpleLruReadPage(ctl, pageno, xid);
414+
}
415+
354416
/*
355417
* Write a page from a shared buffer, if necessary.
356418
* Does nothing if the specified slot is not dirty.
@@ -729,8 +791,10 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
729791
for (;;)
730792
{
731793
int slotno;
732-
int bestslot = 0;
733-
unsigned int bestcount = 0;
794+
int cur_count;
795+
int bestslot;
796+
int best_delta;
797+
int best_page_number;
734798

735799
/* See if page already has a buffer assigned */
736800
for (slotno = 0; slotno < NUM_SLRU_BUFFERS; slotno++)
@@ -742,17 +806,59 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
742806

743807
/*
744808
* If we find any EMPTY slot, just select that one. Else locate the
745-
* least-recently-used slot that isn't the latest page.
809+
* least-recently-used slot to replace.
810+
*
811+
* Normally the page_lru_count values will all be different and so
812+
* there will be a well-defined LRU page. But since we allow
813+
* concurrent execution of SlruRecentlyUsed() within
814+
* SimpleLruReadPage_ReadOnly(), it is possible that multiple pages
815+
* acquire the same lru_count values. In that case we break ties by
816+
* choosing the furthest-back page.
817+
*
818+
* In no case will we select the slot containing latest_page_number
819+
* for replacement, even if it appears least recently used.
820+
*
821+
* Notice that this next line forcibly advances cur_lru_count to a
822+
* value that is certainly beyond any value that will be in the
823+
* page_lru_count array after the loop finishes. This ensures that
824+
* the next execution of SlruRecentlyUsed will mark the page newly
825+
* used, even if it's for a page that has the current counter value.
826+
* That gets us back on the path to having good data when there are
827+
* multiple pages with the same lru_count.
746828
*/
829+
cur_count = (shared->cur_lru_count)++;
830+
best_delta = -1;
831+
bestslot = 0; /* no-op, just keeps compiler quiet */
832+
best_page_number = 0; /* ditto */
747833
for (slotno = 0; slotno < NUM_SLRU_BUFFERS; slotno++)
748834
{
835+
int this_delta;
836+
int this_page_number;
837+
749838
if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
750839
return slotno;
751-
if (shared->page_lru_count[slotno] > bestcount &&
752-
shared->page_number[slotno] != shared->latest_page_number)
840+
this_delta = cur_count - shared->page_lru_count[slotno];
841+
if (this_delta < 0)
842+
{
843+
/*
844+
* Clean up in case shared updates have caused cur_count
845+
* increments to get "lost". We back off the page counts,
846+
* rather than trying to increase cur_count, to avoid any
847+
* question of infinite loops or failure in the presence of
848+
* wrapped-around counts.
849+
*/
850+
shared->page_lru_count[slotno] = cur_count;
851+
this_delta = 0;
852+
}
853+
this_page_number = shared->page_number[slotno];
854+
if ((this_delta > best_delta ||
855+
(this_delta == best_delta &&
856+
ctl->PagePrecedes(this_page_number, best_page_number))) &&
857+
this_page_number != shared->latest_page_number)
753858
{
754859
bestslot = slotno;
755-
bestcount = shared->page_lru_count[slotno];
860+
best_delta = this_delta;
861+
best_page_number = this_page_number;
756862
}
757863
}
758864

src/backend/access/transam/subtrans.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
2323
* Portions Copyright (c) 1994, Regents of the University of California
2424
*
25-
* $PostgreSQL: pgsql/src/backend/access/transam/subtrans.c,v 1.13 2005/11/22 18:17:07 momjian Exp $
25+
* $PostgreSQL: pgsql/src/backend/access/transam/subtrans.c,v 1.14 2005/12/06 18:10:06 tgl Exp $
2626
*
2727
*-------------------------------------------------------------------------
2828
*/
@@ -110,9 +110,9 @@ SubTransGetParent(TransactionId xid)
110110
if (!TransactionIdIsNormal(xid))
111111
return InvalidTransactionId;
112112

113-
LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
113+
/* lock is acquired by SimpleLruReadPage_ReadOnly */
114114

115-
slotno = SimpleLruReadPage(SubTransCtl, pageno, xid);
115+
slotno = SimpleLruReadPage_ReadOnly(SubTransCtl, pageno, xid);
116116
ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno];
117117
ptr += entryno;
118118

src/include/access/slru.h

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
9-
* $PostgreSQL: pgsql/src/include/access/slru.h,v 1.15 2005/11/05 21:19:47 tgl Exp $
9+
* $PostgreSQL: pgsql/src/include/access/slru.h,v 1.16 2005/12/06 18:10:06 tgl Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -46,17 +46,26 @@ typedef struct SlruSharedData
4646

4747
/*
4848
* Info for each buffer slot. Page number is undefined when status is
49-
* EMPTY. lru_count is essentially the number of page switches since last
50-
* use of this page; the page with highest lru_count is the best candidate
51-
* to replace.
49+
* EMPTY.
5250
*/
5351
char *page_buffer[NUM_SLRU_BUFFERS];
5452
SlruPageStatus page_status[NUM_SLRU_BUFFERS];
5553
bool page_dirty[NUM_SLRU_BUFFERS];
5654
int page_number[NUM_SLRU_BUFFERS];
57-
unsigned int page_lru_count[NUM_SLRU_BUFFERS];
55+
int page_lru_count[NUM_SLRU_BUFFERS];
5856
LWLockId buffer_locks[NUM_SLRU_BUFFERS];
5957

58+
/*----------
59+
* We mark a page "most recently used" by setting
60+
* page_lru_count[slotno] = ++cur_lru_count;
61+
* The oldest page is therefore the one with the highest value of
62+
* cur_lru_count - page_lru_count[slotno]
63+
* The counts will eventually wrap around, but this calculation still
64+
* works as long as no page's age exceeds INT_MAX counts.
65+
*----------
66+
*/
67+
int cur_lru_count;
68+
6069
/*
6170
* latest_page_number is the page number of the current end of the log;
6271
* this is not critical data, since we use it only to avoid swapping out
@@ -106,6 +115,8 @@ extern void SimpleLruInit(SlruCtl ctl, const char *name,
106115
LWLockId ctllock, const char *subdir);
107116
extern int SimpleLruZeroPage(SlruCtl ctl, int pageno);
108117
extern int SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid);
118+
extern int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno,
119+
TransactionId xid);
109120
extern void SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata);
110121
extern void SimpleLruFlush(SlruCtl ctl, bool checkpoint);
111122
extern void SimpleLruTruncate(SlruCtl ctl, int cutoffPage);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy