Skip to content

Commit d2158b0

Browse files
committed
* Add support NULL to GiST.
* some refactoring and simplify code int gistutil.c and gist.c * now in some cases it can be called used-defined picksplit method for non-first column in index, but here is a place to do more. * small fix of docs related to support NULL.
1 parent 8672205 commit d2158b0

File tree

8 files changed

+375
-430
lines changed

8 files changed

+375
-430
lines changed

doc/src/sgml/indexam.sgml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- $PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.11 2006/05/10 23:18:38 tgl Exp $ -->
1+
<!-- $PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.12 2006/05/24 11:01:39 teodor Exp $ -->
22

33
<chapter id="indexam">
44
<title>Index Access Method Interface Definition</title>
@@ -126,8 +126,7 @@
126126
used to scan for rows with <literal>a = 4</literal>, which is wrong if the
127127
index omits rows where <literal>b</> is null.
128128
It is, however, OK to omit rows where the first indexed column is null.
129-
(GiST currently does so.) Thus,
130-
<structfield>amindexnulls</structfield> should be set true only if the
129+
Thus, <structfield>amindexnulls</structfield> should be set true only if the
131130
index access method indexes all rows, including arbitrary combinations of
132131
null values.
133132
</para>

doc/src/sgml/indices.sgml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- $PostgreSQL: pgsql/doc/src/sgml/indices.sgml,v 1.56 2006/01/18 21:29:45 momjian Exp $ -->
1+
<!-- $PostgreSQL: pgsql/doc/src/sgml/indices.sgml,v 1.57 2006/05/24 11:01:39 teodor Exp $ -->
22

33
<chapter id="indexes">
44
<title id="indexes-title">Indexes</title>
@@ -290,13 +290,13 @@ CREATE INDEX test2_mm_idx ON test2 (major, minor);
290290
</para>
291291

292292
<para>
293-
A multicolumn GiST index can only be used when there is a query condition
294-
on its leading column. Conditions on additional columns restrict the
295-
entries returned by the index, but the condition on the first column is the
296-
most important one for determining how much of the index needs to be
297-
scanned. A GiST index will be relatively ineffective if its first column
298-
has only a few distinct values, even if there are many distinct values in
299-
additional columns.
293+
A multicolumn GiST index can be used with query conditions that
294+
involve any subset of the index's columns. Conditions on additional
295+
columns restrict the entries returned by the index, but the condition on
296+
the first column is the most important one for determining how much of
297+
the index needs to be scanned. A GiST index will be relatively
298+
ineffective if its first column has only a few distinct values, even if
299+
there are many distinct values in additional columns.
300300
</para>
301301

302302
<para>

src/backend/access/gist/gist.c

Lines changed: 148 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.136 2006/05/19 16:15:17 teodor Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.137 2006/05/24 11:01:39 teodor Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -181,32 +181,13 @@ gistbuildCallback(Relation index,
181181
{
182182
GISTBuildState *buildstate = (GISTBuildState *) state;
183183
IndexTuple itup;
184-
GISTENTRY tmpcentry;
185-
int i;
186184
MemoryContext oldCtx;
187185

188-
/* GiST cannot index tuples with leading NULLs */
189-
if (isnull[0])
190-
return;
191-
192186
oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
193187

194-
/* immediately compress keys to normalize */
195-
for (i = 0; i < buildstate->numindexattrs; i++)
196-
{
197-
if (isnull[i])
198-
values[i] = (Datum) 0;
199-
else
200-
{
201-
gistcentryinit(&buildstate->giststate, i, &tmpcentry, values[i],
202-
NULL, NULL, (OffsetNumber) 0,
203-
-1 /* size is currently bogus */ , TRUE, FALSE);
204-
values[i] = tmpcentry.key;
205-
}
206-
}
207-
208188
/* form an index tuple and point it at the heap tuple */
209-
itup = index_form_tuple(buildstate->giststate.tupdesc, values, isnull);
189+
itup = gistFormTuple(&buildstate->giststate, index,
190+
values, NULL /* size is currently bogus */, isnull);
210191
itup->t_tid = htup->t_self;
211192

212193
/*
@@ -243,34 +224,16 @@ gistinsert(PG_FUNCTION_ARGS)
243224
#endif
244225
IndexTuple itup;
245226
GISTSTATE giststate;
246-
GISTENTRY tmpentry;
247-
int i;
248227
MemoryContext oldCtx;
249228
MemoryContext insertCtx;
250229

251-
/* GiST cannot index tuples with leading NULLs */
252-
if (isnull[0])
253-
PG_RETURN_BOOL(false);
254-
255230
insertCtx = createTempGistContext();
256231
oldCtx = MemoryContextSwitchTo(insertCtx);
257232

258233
initGISTstate(&giststate, r);
259234

260-
/* immediately compress keys to normalize */
261-
for (i = 0; i < r->rd_att->natts; i++)
262-
{
263-
if (isnull[i])
264-
values[i] = (Datum) 0;
265-
else
266-
{
267-
gistcentryinit(&giststate, i, &tmpentry, values[i],
268-
NULL, NULL, (OffsetNumber) 0,
269-
-1 /* size is currently bogus */ , TRUE, FALSE);
270-
values[i] = tmpentry.key;
271-
}
272-
}
273-
itup = index_form_tuple(giststate.tupdesc, values, isnull);
235+
itup = gistFormTuple(&giststate, r,
236+
values, NULL /* size is currently bogus */, isnull);
274237
itup->t_tid = *ht_ctid;
275238

276239
gistdoinsert(r, itup, &giststate);
@@ -937,7 +900,147 @@ gistmakedeal(GISTInsertState *state, GISTSTATE *giststate)
937900
}
938901

939902
/*
940-
* gistSplit -- split a page in the tree.
903+
* simple split page
904+
*/
905+
static void
906+
gistSplitHalf(GIST_SPLITVEC *v, int len) {
907+
int i;
908+
909+
v->spl_nright = v->spl_nleft = 0;
910+
v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
911+
v->spl_right= (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
912+
for(i = 1; i <= len; i++)
913+
if ( i<len/2 )
914+
v->spl_right[ v->spl_nright++ ] = i;
915+
else
916+
v->spl_left[ v->spl_nleft++ ] = i;
917+
}
918+
919+
/*
920+
* if it was invalid tuple then we need special processing.
921+
* We move all invalid tuples on right page.
922+
*
923+
* if there is no place on left page, gistSplit will be called one more
924+
* time for left page.
925+
*
926+
* Normally, we never exec this code, but after crash replay it's possible
927+
* to get 'invalid' tuples (probability is low enough)
928+
*/
929+
static void
930+
gistSplitByInvalid(GISTSTATE *giststate, GIST_SPLITVEC *v, IndexTuple *itup, int len) {
931+
int i;
932+
static OffsetNumber offInvTuples[ MaxOffsetNumber ];
933+
int nOffInvTuples = 0;
934+
935+
for (i = 1; i <= len; i++)
936+
if ( GistTupleIsInvalid(itup[i - 1]) )
937+
offInvTuples[ nOffInvTuples++ ] = i;
938+
939+
if ( nOffInvTuples == len ) {
940+
/* corner case, all tuples are invalid */
941+
v->spl_rightvalid= v->spl_leftvalid = false;
942+
gistSplitHalf( v, len );
943+
} else {
944+
GistSplitVec gsvp;
945+
946+
v->spl_right = offInvTuples;
947+
v->spl_nright = nOffInvTuples;
948+
v->spl_rightvalid = false;
949+
950+
v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
951+
v->spl_nleft = 0;
952+
for(i = 1; i <= len; i++)
953+
if ( !GistTupleIsInvalid(itup[i - 1]) )
954+
v->spl_left[ v->spl_nleft++ ] = i;
955+
v->spl_leftvalid = true;
956+
957+
gsvp.idgrp = NULL;
958+
gsvp.attrsize = v->spl_lattrsize;
959+
gsvp.attr = v->spl_lattr;
960+
gsvp.len = v->spl_nleft;
961+
gsvp.entries = v->spl_left;
962+
gsvp.isnull = v->spl_lisnull;
963+
964+
gistunionsubkeyvec(giststate, itup, &gsvp, 0);
965+
}
966+
}
967+
968+
/*
969+
* trys to split page by attno key, in a case of null
970+
* values move its to separate page.
971+
*/
972+
static void
973+
gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *giststate,
974+
GIST_SPLITVEC *v, GistEntryVector *entryvec, int attno) {
975+
int i;
976+
static OffsetNumber offNullTuples[ MaxOffsetNumber ];
977+
int nOffNullTuples = 0;
978+
979+
980+
for (i = 1; i <= len; i++) {
981+
Datum datum;
982+
bool IsNull;
983+
984+
if (!GistPageIsLeaf(page) && GistTupleIsInvalid(itup[i - 1])) {
985+
gistSplitByInvalid(giststate, v, itup, len);
986+
return;
987+
}
988+
989+
datum = index_getattr(itup[i - 1], attno+1, giststate->tupdesc, &IsNull);
990+
gistdentryinit(giststate, attno, &(entryvec->vector[i]),
991+
datum, r, page, i,
992+
ATTSIZE(datum, giststate->tupdesc, attno+1, IsNull),
993+
FALSE, IsNull);
994+
if ( IsNull )
995+
offNullTuples[ nOffNullTuples++ ] = i;
996+
}
997+
998+
v->spl_leftvalid = v->spl_rightvalid = true;
999+
1000+
if ( nOffNullTuples == len ) {
1001+
/*
1002+
* Corner case: All keys in attno column are null, we should try to
1003+
* by keys in next column. It all keys in all columns
1004+
* are NULL just split page half by half
1005+
*/
1006+
v->spl_risnull[attno] = v->spl_lisnull[attno] = TRUE;
1007+
if ( attno+1 == r->rd_att->natts )
1008+
gistSplitHalf( v, len );
1009+
else
1010+
gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno+1);
1011+
} else if ( nOffNullTuples > 0 ) {
1012+
int j=0;
1013+
1014+
/*
1015+
* We don't want to mix NULLs and not-NULLs keys
1016+
* on one page, so move nulls to right page
1017+
*/
1018+
v->spl_right = offNullTuples;
1019+
v->spl_nright = nOffNullTuples;
1020+
v->spl_risnull[attno] = TRUE;
1021+
1022+
v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
1023+
v->spl_nleft = 0;
1024+
for(i = 1; i <= len; i++)
1025+
if ( j<v->spl_nright && offNullTuples[j] == i )
1026+
j++;
1027+
else
1028+
v->spl_left[ v->spl_nleft++ ] = i;
1029+
1030+
v->spl_idgrp = NULL;
1031+
gistunionsubkey(giststate, itup, v, 0);
1032+
} else {
1033+
/*
1034+
* all keys are not-null
1035+
*/
1036+
gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate);
1037+
}
1038+
}
1039+
1040+
/*
1041+
* gistSplit -- split a page in the tree and fill struct
1042+
* used for XLOG and real writes buffers. Function is recursive, ie
1043+
* it will split page until keys will fit in every page.
9411044
*/
9421045
SplitedPageLayout *
9431046
gistSplit(Relation r,
@@ -951,77 +1054,14 @@ gistSplit(Relation r,
9511054
GIST_SPLITVEC v;
9521055
GistEntryVector *entryvec;
9531056
int i;
954-
OffsetNumber offInvTuples[ MaxOffsetNumber ];
955-
int nOffInvTuples = 0;
9561057
SplitedPageLayout *res = NULL;
9571058

9581059
/* generate the item array */
9591060
entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY));
9601061
entryvec->n = len + 1;
9611062

962-
for (i = 1; i <= len; i++)
963-
{
964-
Datum datum;
965-
bool IsNull;
966-
967-
if (!GistPageIsLeaf(page) && GistTupleIsInvalid(itup[i - 1]))
968-
/* remember position of invalid tuple */
969-
offInvTuples[ nOffInvTuples++ ] = i;
970-
971-
if ( nOffInvTuples > 0 )
972-
/* we can safely do not decompress other keys, because
973-
we will do splecial processing, but
974-
it's needed to find another invalid tuples */
975-
continue;
976-
977-
datum = index_getattr(itup[i - 1], 1, giststate->tupdesc, &IsNull);
978-
gistdentryinit(giststate, 0, &(entryvec->vector[i]),
979-
datum, r, page, i,
980-
ATTSIZE(datum, giststate->tupdesc, 1, IsNull),
981-
FALSE, IsNull);
982-
}
983-
984-
/*
985-
* if it was invalid tuple then we need special processing.
986-
* We move all invalid tuples on right page.
987-
*
988-
* if there is no place on left page, gistSplit will be called one more
989-
* time for left page.
990-
*
991-
* Normally, we never exec this code, but after crash replay it's possible
992-
* to get 'invalid' tuples (probability is low enough)
993-
*/
994-
if (nOffInvTuples > 0)
995-
{
996-
GistSplitVec gsvp;
997-
998-
v.spl_right = offInvTuples;
999-
v.spl_nright = nOffInvTuples;
1000-
v.spl_rightvalid = false;
1001-
1002-
v.spl_left = (OffsetNumber *) palloc(entryvec->n * sizeof(OffsetNumber));
1003-
v.spl_nleft = 0;
1004-
for(i = 1; i <= len; i++)
1005-
if ( !GistTupleIsInvalid(itup[i - 1]) )
1006-
v.spl_left[ v.spl_nleft++ ] = i;
1007-
v.spl_leftvalid = true;
1008-
1009-
gsvp.idgrp = NULL;
1010-
gsvp.attrsize = v.spl_lattrsize;
1011-
gsvp.attr = v.spl_lattr;
1012-
gsvp.len = v.spl_nleft;
1013-
gsvp.entries = v.spl_left;
1014-
gsvp.isnull = v.spl_lisnull;
1015-
1016-
gistunionsubkeyvec(giststate, itup, &gsvp, true);
1017-
}
1018-
else
1019-
{
1020-
/* there is no invalid tuples, so usial processing */
1021-
gistUserPicksplit(r, entryvec, &v, itup, len, giststate);
1022-
v.spl_leftvalid = v.spl_rightvalid = true;
1023-
}
1024-
1063+
gistSplitByKey(r, page, itup, len, giststate,
1064+
&v, entryvec, 0);
10251065

10261066
/* form left and right vector */
10271067
lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));

src/backend/access/gist/gistget.c

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.56 2006/03/05 15:58:20 momjian Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.57 2006/05/24 11:01:39 teodor Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -361,7 +361,7 @@ gistindex_keytest(IndexTuple tuple,
361361
IncrIndexProcessed();
362362

363363
/*
364-
* Tuple doesn't restore after crash recovery because of inclomplete
364+
* Tuple doesn't restore after crash recovery because of incomplete
365365
* insert
366366
*/
367367
if (!GistPageIsLeaf(p) && GistTupleIsInvalid(tuple))
@@ -378,14 +378,15 @@ gistindex_keytest(IndexTuple tuple,
378378
key->sk_attno,
379379
giststate->tupdesc,
380380
&isNull);
381-
/* is the index entry NULL? */
382-
if (isNull)
383-
{
384-
/* XXX eventually should check if SK_ISNULL */
381+
382+
if ( key->sk_flags & SK_ISNULL ) {
383+
/* is the compared-to datum NULL? on non-leaf page it's possible
384+
to have nulls in childs :( */
385+
386+
if ( isNull || !GistPageIsLeaf(p) )
387+
return true;
385388
return false;
386-
}
387-
/* is the compared-to datum NULL? */
388-
if (key->sk_flags & SK_ISNULL)
389+
} else if ( isNull )
389390
return false;
390391

391392
gistdentryinit(giststate, key->sk_attno - 1, &de,

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy