Skip to content

Commit b4297c1

Browse files
author
Neil Conway
committed
This patch makes some improvements to the rtree index implementation:
(1) Keep a pin on the scan's current buffer and mark buffer. This avoids the need to do a ReadBuffer() for each tuple produced by the scan. Since ReadBuffer() is expensive, this is a significant win. (2) Convert a ReleaseBuffer(); ReadBuffer() pair into ReleaseAndReadBuffer(). Surely not a huge win, but it saves a lock acquire/release... (3) Remove a bunch of duplicated code in rtget.c; make rtnext() handle both the "initial result" and "subsequent result" cases. (4) Add support for index tuple killing (5) Remove rtscancache(): it is dead code, for the same reason that gistscancache() is dead code (an index scan ought not be invoked with NoMovementScanDirection). The end result is about a 10% improvement in rtree index scan perf, according to contrib/rtree_gist/bench.
1 parent 1f5299b commit b4297c1

File tree

4 files changed

+157
-154
lines changed

4 files changed

+157
-154
lines changed

src/backend/access/rtree/rtget.c

Lines changed: 102 additions & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/rtree/rtget.c,v 1.33 2004/12/31 21:59:26 pgsql Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/rtree/rtget.c,v 1.34 2005/01/18 23:25:43 neilc Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -19,10 +19,8 @@
1919
#include "access/relscan.h"
2020
#include "access/rtree.h"
2121

22-
static OffsetNumber findnext(IndexScanDesc s, Page p, OffsetNumber n,
22+
static OffsetNumber findnext(IndexScanDesc s, OffsetNumber n,
2323
ScanDirection dir);
24-
static bool rtscancache(IndexScanDesc s, ScanDirection dir);
25-
static bool rtfirst(IndexScanDesc s, ScanDirection dir);
2624
static bool rtnext(IndexScanDesc s, ScanDirection dir);
2725

2826

@@ -31,138 +29,106 @@ rtgettuple(PG_FUNCTION_ARGS)
3129
{
3230
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
3331
ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
34-
bool res;
35-
36-
/* if we have it cached in the scan desc, just return the value */
37-
if (rtscancache(s, dir))
38-
PG_RETURN_BOOL(true);
39-
40-
/* not cached, so we'll have to do some work */
41-
if (ItemPointerIsValid(&(s->currentItemData)))
42-
res = rtnext(s, dir);
43-
else
44-
res = rtfirst(s, dir);
45-
PG_RETURN_BOOL(res);
46-
}
47-
48-
static bool
49-
rtfirst(IndexScanDesc s, ScanDirection dir)
50-
{
51-
Buffer b;
52-
Page p;
53-
OffsetNumber n;
54-
OffsetNumber maxoff;
55-
RTreePageOpaque po;
32+
Page page;
33+
OffsetNumber offnum;
5634
RTreeScanOpaque so;
57-
RTSTACK *stk;
58-
BlockNumber blk;
59-
IndexTuple it;
6035

61-
b = ReadBuffer(s->indexRelation, P_ROOT);
62-
p = BufferGetPage(b);
63-
po = (RTreePageOpaque) PageGetSpecialPointer(p);
6436
so = (RTreeScanOpaque) s->opaque;
6537

66-
for (;;)
38+
/*
39+
* If we've already produced a tuple and the executor has informed
40+
* us that it should be marked "killed", do so know.
41+
*/
42+
if (s->kill_prior_tuple && ItemPointerIsValid(&(s->currentItemData)))
6743
{
68-
maxoff = PageGetMaxOffsetNumber(p);
69-
if (ScanDirectionIsBackward(dir))
70-
n = findnext(s, p, maxoff, dir);
71-
else
72-
n = findnext(s, p, FirstOffsetNumber, dir);
73-
74-
while (n < FirstOffsetNumber || n > maxoff)
75-
{
76-
ReleaseBuffer(b);
77-
if (so->s_stack == NULL)
78-
return false;
79-
80-
stk = so->s_stack;
81-
b = ReadBuffer(s->indexRelation, stk->rts_blk);
82-
p = BufferGetPage(b);
83-
po = (RTreePageOpaque) PageGetSpecialPointer(p);
84-
maxoff = PageGetMaxOffsetNumber(p);
44+
offnum = ItemPointerGetOffsetNumber(&(s->currentItemData));
45+
page = BufferGetPage(so->curbuf);
46+
PageGetItemId(page, offnum)->lp_flags |= LP_DELETE;
47+
SetBufferCommitInfoNeedsSave(so->curbuf);
48+
}
8549

86-
if (ScanDirectionIsBackward(dir))
87-
n = OffsetNumberPrev(stk->rts_child);
88-
else
89-
n = OffsetNumberNext(stk->rts_child);
90-
so->s_stack = stk->rts_parent;
91-
pfree(stk);
50+
/*
51+
* Get the next tuple that matches the search key; if asked to
52+
* skip killed tuples, find the first non-killed tuple that
53+
* matches. Return as soon as we've run out of matches or we've
54+
* found an acceptable match.
55+
*/
56+
for (;;)
57+
{
58+
bool res = rtnext(s, dir);
9259

93-
n = findnext(s, p, n, dir);
94-
}
95-
if (po->flags & F_LEAF)
60+
if (res == true && s->ignore_killed_tuples)
9661
{
97-
ItemPointerSet(&(s->currentItemData), BufferGetBlockNumber(b), n);
98-
99-
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
100-
101-
s->xs_ctup.t_self = it->t_tid;
102-
103-
ReleaseBuffer(b);
104-
return true;
62+
offnum = ItemPointerGetOffsetNumber(&(s->currentItemData));
63+
page = BufferGetPage(so->curbuf);
64+
if (ItemIdDeleted(PageGetItemId(page, offnum)))
65+
continue;
10566
}
106-
else
107-
{
108-
stk = (RTSTACK *) palloc(sizeof(RTSTACK));
109-
stk->rts_child = n;
110-
stk->rts_blk = BufferGetBlockNumber(b);
111-
stk->rts_parent = so->s_stack;
112-
so->s_stack = stk;
113-
114-
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
115-
blk = ItemPointerGetBlockNumber(&(it->t_tid));
11667

117-
ReleaseBuffer(b);
118-
b = ReadBuffer(s->indexRelation, blk);
119-
p = BufferGetPage(b);
120-
po = (RTreePageOpaque) PageGetSpecialPointer(p);
121-
}
68+
PG_RETURN_BOOL(res);
12269
}
12370
}
12471

12572
static bool
12673
rtnext(IndexScanDesc s, ScanDirection dir)
12774
{
128-
Buffer b;
12975
Page p;
13076
OffsetNumber n;
131-
OffsetNumber maxoff;
13277
RTreePageOpaque po;
13378
RTreeScanOpaque so;
134-
RTSTACK *stk;
135-
BlockNumber blk;
136-
IndexTuple it;
13779

138-
blk = ItemPointerGetBlockNumber(&(s->currentItemData));
139-
n = ItemPointerGetOffsetNumber(&(s->currentItemData));
80+
so = (RTreeScanOpaque) s->opaque;
14081

141-
if (ScanDirectionIsForward(dir))
142-
n = OffsetNumberNext(n);
143-
else
144-
n = OffsetNumberPrev(n);
82+
if (!ItemPointerIsValid(&(s->currentItemData)))
83+
{
84+
/* first call: start at the root */
85+
Assert(BufferIsValid(so->curbuf) == false);
86+
so->curbuf = ReadBuffer(s->indexRelation, P_ROOT);
87+
}
14588

146-
b = ReadBuffer(s->indexRelation, blk);
147-
p = BufferGetPage(b);
89+
p = BufferGetPage(so->curbuf);
14890
po = (RTreePageOpaque) PageGetSpecialPointer(p);
149-
so = (RTreeScanOpaque) s->opaque;
91+
92+
if (!ItemPointerIsValid(&(s->currentItemData)))
93+
{
94+
/* first call: start at first/last offset */
95+
if (ScanDirectionIsForward(dir))
96+
n = FirstOffsetNumber;
97+
else
98+
n = PageGetMaxOffsetNumber(p);
99+
}
100+
else
101+
{
102+
/* go on to the next offset */
103+
n = ItemPointerGetOffsetNumber(&(s->currentItemData));
104+
if (ScanDirectionIsForward(dir))
105+
n = OffsetNumberNext(n);
106+
else
107+
n = OffsetNumberPrev(n);
108+
}
150109

151110
for (;;)
152111
{
153-
maxoff = PageGetMaxOffsetNumber(p);
154-
n = findnext(s, p, n, dir);
112+
IndexTuple it;
113+
RTSTACK *stk;
114+
115+
n = findnext(s, n, dir);
155116

156-
while (n < FirstOffsetNumber || n > maxoff)
117+
/* no match on this page, so read in the next stack entry */
118+
if (n == InvalidOffsetNumber)
157119
{
158-
ReleaseBuffer(b);
120+
/* if out of stack entries, we're done */
159121
if (so->s_stack == NULL)
122+
{
123+
ReleaseBuffer(so->curbuf);
124+
so->curbuf = InvalidBuffer;
160125
return false;
126+
}
161127

162128
stk = so->s_stack;
163-
b = ReadBuffer(s->indexRelation, stk->rts_blk);
164-
p = BufferGetPage(b);
165-
maxoff = PageGetMaxOffsetNumber(p);
129+
so->curbuf = ReleaseAndReadBuffer(so->curbuf, s->indexRelation,
130+
stk->rts_blk);
131+
p = BufferGetPage(so->curbuf);
166132
po = (RTreePageOpaque) PageGetSpecialPointer(p);
167133

168134
if (ScanDirectionIsBackward(dir))
@@ -172,33 +138,41 @@ rtnext(IndexScanDesc s, ScanDirection dir)
172138
so->s_stack = stk->rts_parent;
173139
pfree(stk);
174140

175-
n = findnext(s, p, n, dir);
141+
continue;
176142
}
143+
177144
if (po->flags & F_LEAF)
178145
{
179-
ItemPointerSet(&(s->currentItemData), BufferGetBlockNumber(b), n);
180-
146+
ItemPointerSet(&(s->currentItemData),
147+
BufferGetBlockNumber(so->curbuf),
148+
n);
181149
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
182-
183150
s->xs_ctup.t_self = it->t_tid;
184-
185-
ReleaseBuffer(b);
186151
return true;
187152
}
188153
else
189154
{
155+
BlockNumber blk;
156+
190157
stk = (RTSTACK *) palloc(sizeof(RTSTACK));
191158
stk->rts_child = n;
192-
stk->rts_blk = BufferGetBlockNumber(b);
159+
stk->rts_blk = BufferGetBlockNumber(so->curbuf);
193160
stk->rts_parent = so->s_stack;
194161
so->s_stack = stk;
195162

196163
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
197164
blk = ItemPointerGetBlockNumber(&(it->t_tid));
198165

199-
ReleaseBuffer(b);
200-
b = ReadBuffer(s->indexRelation, blk);
201-
p = BufferGetPage(b);
166+
/*
167+
* Note that we release the pin on the page as we descend
168+
* down the tree, even though there's a good chance we'll
169+
* eventually need to re-read the buffer later in this
170+
* scan. This may or may not be optimal, but it doesn't
171+
* seem likely to make a huge performance difference
172+
* either way.
173+
*/
174+
so->curbuf = ReleaseAndReadBuffer(so->curbuf, s->indexRelation, blk);
175+
p = BufferGetPage(so->curbuf);
202176
po = (RTreePageOpaque) PageGetSpecialPointer(p);
203177

204178
if (ScanDirectionIsBackward(dir))
@@ -209,17 +183,26 @@ rtnext(IndexScanDesc s, ScanDirection dir)
209183
}
210184
}
211185

186+
/*
187+
* Return the offset of the next matching index entry. We begin the
188+
* search at offset "n" and search for matches in the direction
189+
* "dir". If no more matching entries are found on the page,
190+
* InvalidOffsetNumber is returned.
191+
*/
212192
static OffsetNumber
213-
findnext(IndexScanDesc s, Page p, OffsetNumber n, ScanDirection dir)
193+
findnext(IndexScanDesc s, OffsetNumber n, ScanDirection dir)
214194
{
215195
OffsetNumber maxoff;
216196
IndexTuple it;
217197
RTreePageOpaque po;
218198
RTreeScanOpaque so;
199+
Page p;
200+
201+
so = (RTreeScanOpaque) s->opaque;
202+
p = BufferGetPage(so->curbuf);
219203

220204
maxoff = PageGetMaxOffsetNumber(p);
221205
po = (RTreePageOpaque) PageGetSpecialPointer(p);
222-
so = (RTreeScanOpaque) s->opaque;
223206

224207
/*
225208
* If we modified the index during the scan, we may have a pointer to
@@ -256,28 +239,8 @@ findnext(IndexScanDesc s, Page p, OffsetNumber n, ScanDirection dir)
256239
n = OffsetNumberNext(n);
257240
}
258241

259-
return n;
260-
}
261-
262-
static bool
263-
rtscancache(IndexScanDesc s, ScanDirection dir)
264-
{
265-
Buffer b;
266-
Page p;
267-
OffsetNumber n;
268-
IndexTuple it;
269-
270-
if (!(ScanDirectionIsNoMovement(dir)
271-
&& ItemPointerIsValid(&(s->currentItemData))))
272-
return false;
273-
274-
b = ReadBuffer(s->indexRelation,
275-
ItemPointerGetBlockNumber(&(s->currentItemData)));
276-
p = BufferGetPage(b);
277-
n = ItemPointerGetOffsetNumber(&(s->currentItemData));
278-
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
279-
s->xs_ctup.t_self = it->t_tid;
280-
ReleaseBuffer(b);
281-
282-
return true;
242+
if (n >= FirstOffsetNumber && n <= maxoff)
243+
return n; /* found a match on this page */
244+
else
245+
return InvalidOffsetNumber; /* no match, go to next page */
283246
}

src/backend/access/rtree/rtree.c

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/rtree/rtree.c,v 1.85 2004/12/31 21:59:26 pgsql Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/rtree/rtree.c,v 1.86 2005/01/18 23:25:47 neilc Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -280,12 +280,8 @@ rtdoinsert(Relation r, IndexTuple itup, RTSTATE *rtstate)
280280

281281
do
282282
{
283-
/* let go of current buffer before getting next */
284-
if (buffer != InvalidBuffer)
285-
ReleaseBuffer(buffer);
286-
287-
/* get next buffer */
288-
buffer = ReadBuffer(r, blk);
283+
/* release the current buffer, read in the next one */
284+
buffer = ReleaseAndReadBuffer(buffer, r, blk);
289285
page = (Page) BufferGetPage(buffer);
290286

291287
opaque = (RTreePageOpaque) PageGetSpecialPointer(page);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy