Skip to content

Commit e5db11c

Browse files
committed
Improve coding of gistchoose and gistRelocateBuildBuffersOnSplit.
This is mostly cosmetic, but it does eliminate a speculative portability issue. The previous coding ignored the fact that sum_grow could easily overflow (in fact, it could be summing multiple IEEE float infinities). On a platform where that didn't guarantee to produce a positive result, the code would misbehave. In any case, it was less than readable.
1 parent 5fcb58b commit e5db11c

File tree

2 files changed

+110
-87
lines changed

2 files changed

+110
-87
lines changed

src/backend/access/gist/gistbuildbuffers.c

Lines changed: 50 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -625,18 +625,17 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
625625
}
626626

627627
/*
628-
* Loop through all index tuples on the buffer on the page being split,
629-
* moving them to buffers on the new pages. We try to move each tuple
628+
* Loop through all index tuples in the buffer of the page being split,
629+
* moving them to buffers for the new pages. We try to move each tuple to
630630
* the page that will result in the lowest penalty for the leading column
631631
* or, in the case of a tie, the lowest penalty for the earliest column
632632
* that is not tied.
633633
*
634-
* The guts of this loop are very similar to gistchoose().
634+
* The page searching logic is very similar to gistchoose().
635635
*/
636636
while (gistPopItupFromNodeBuffer(gfbb, &oldBuf, &itup))
637637
{
638-
float sum_grow,
639-
which_grow[INDEX_MAX_KEYS];
638+
float best_penalty[INDEX_MAX_KEYS];
640639
int i,
641640
which;
642641
IndexTuple newtup;
@@ -645,71 +644,88 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
645644
gistDeCompressAtt(giststate, r,
646645
itup, NULL, (OffsetNumber) 0, entry, isnull);
647646

648-
which = -1;
649-
*which_grow = -1.0f;
647+
/* default to using first page (shouldn't matter) */
648+
which = 0;
650649

651650
/*
652-
* Loop over possible target pages. We'll exit early if we find an index key that
653-
* can accommodate the new key with no penalty on any column. sum_grow is used to
654-
* track this condition. It doesn't need to be exactly accurate, just >0 whenever
655-
* we want the loop to continue and equal to 0 when we want it to terminate.
651+
* best_penalty[j] is the best penalty we have seen so far for column
652+
* j, or -1 when we haven't yet examined column j. Array entries to
653+
* the right of the first -1 are undefined.
656654
*/
657-
sum_grow = 1.0f;
655+
best_penalty[0] = -1;
658656

659-
for (i = 0; i < splitPagesCount && sum_grow; i++)
657+
/*
658+
* Loop over possible target pages, looking for one to move this tuple
659+
* to.
660+
*/
661+
for (i = 0; i < splitPagesCount; i++)
660662
{
661-
int j;
662663
RelocationBufferInfo *splitPageInfo = &relocationBuffersInfos[i];
664+
bool zero_penalty;
665+
int j;
663666

664-
sum_grow = 0.0f;
667+
zero_penalty = true;
665668

666669
/* Loop over index attributes. */
667670
for (j = 0; j < r->rd_att->natts; j++)
668671
{
669672
float usize;
670673

674+
/* Compute penalty for this column. */
671675
usize = gistpenalty(giststate, j,
672676
&splitPageInfo->entry[j],
673677
splitPageInfo->isnull[j],
674678
&entry[j], isnull[j]);
679+
if (usize > 0)
680+
zero_penalty = false;
675681

676-
if (which_grow[j] < 0 || usize < which_grow[j])
682+
if (best_penalty[j] < 0 || usize < best_penalty[j])
677683
{
678684
/*
679-
* We get here in two cases. First, we may have just discovered that the
680-
* current tuple is the best one we've seen so far; that is, for the first
681-
* column for which the penalty is not equal to the best tuple seen so far,
682-
* this one has a lower penalty than the previously-seen one. But, when
683-
* a new best tuple is found, we must record the best penalty value for
684-
* all the remaining columns. We'll end up here for each remaining index
685-
* column in that case, too.
685+
* New best penalty for column. Tentatively select this
686+
* page as the target, and record the best penalty. Then
687+
* reset the next column's penalty to "unknown" (and
688+
* indirectly, the same for all the ones to its right).
689+
* This will force us to adopt this page's penalty values
690+
* as the best for all the remaining columns during
691+
* subsequent loop iterations.
686692
*/
687693
which = i;
688-
which_grow[j] = usize;
694+
best_penalty[j] = usize;
695+
689696
if (j < r->rd_att->natts - 1)
690-
which_grow[j + 1] = -1;
691-
sum_grow += which_grow[j];
697+
best_penalty[j + 1] = -1;
692698
}
693-
else if (which_grow[j] == usize)
699+
else if (best_penalty[j] == usize)
694700
{
695701
/*
696-
* The current tuple is exactly as good for this column as the best tuple
697-
* seen so far. The next iteration of this loop will compare the next
698-
* column.
702+
* The current page is exactly as good for this column as
703+
* the best page seen so far. The next iteration of this
704+
* loop will compare the next column.
699705
*/
700-
sum_grow += usize;
701706
}
702707
else
703708
{
704709
/*
705-
* The current tuple is worse for this column than the best tuple seen so
706-
* far. Skip the remaining columns and move on to the next tuple, if any.
710+
* The current page is worse for this column than the best
711+
* page seen so far. Skip the remaining columns and move
712+
* on to the next page, if any.
707713
*/
708-
sum_grow = 1;
714+
zero_penalty = false; /* so outer loop won't exit */
709715
break;
710716
}
711717
}
718+
719+
/*
720+
* If we find a page with zero penalty for all columns, there's no
721+
* need to examine remaining pages; just break out of the loop and
722+
* return it.
723+
*/
724+
if (zero_penalty)
725+
break;
712726
}
727+
728+
/* OK, "which" is the page index to push the tuple to */
713729
targetBufferInfo = &relocationBuffersInfos[which];
714730

715731
/* Push item to selected node buffer */

src/backend/access/gist/gistutil.c

Lines changed: 60 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -363,113 +363,120 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis
363363
}
364364

365365
/*
366-
* Search a page for the entry with lowest penalty.
366+
* Search an upper index page for the entry with lowest penalty for insertion
367+
* of the new index key contained in "it".
367368
*
368-
* The index may have multiple columns, and there's a penalty value for each column.
369-
* The penalty associated with a column which appears earlier in the index definition is
370-
* strictly more important than the penalty of column which appears later in the index
371-
* definition.
369+
* Returns the index of the page entry to insert into.
372370
*/
373371
OffsetNumber
374372
gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
375373
GISTSTATE *giststate)
376374
{
375+
OffsetNumber result;
377376
OffsetNumber maxoff;
378377
OffsetNumber i;
379-
OffsetNumber which;
380-
float sum_grow,
381-
which_grow[INDEX_MAX_KEYS];
378+
float best_penalty[INDEX_MAX_KEYS];
382379
GISTENTRY entry,
383380
identry[INDEX_MAX_KEYS];
384381
bool isnull[INDEX_MAX_KEYS];
385382

386-
maxoff = PageGetMaxOffsetNumber(p);
387-
*which_grow = -1.0;
388-
which = InvalidOffsetNumber;
389-
sum_grow = 1;
383+
Assert(!GistPageIsLeaf(p));
384+
390385
gistDeCompressAtt(giststate, r,
391386
it, NULL, (OffsetNumber) 0,
392387
identry, isnull);
393388

394-
Assert(maxoff >= FirstOffsetNumber);
395-
Assert(!GistPageIsLeaf(p));
389+
/* we'll return FirstOffsetNumber if page is empty (shouldn't happen) */
390+
result = FirstOffsetNumber;
396391

397392
/*
398-
* Loop over tuples on page.
393+
* The index may have multiple columns, and there's a penalty value for
394+
* each column. The penalty associated with a column that appears earlier
395+
* in the index definition is strictly more important than the penalty of
396+
* a column that appears later in the index definition.
399397
*
400-
* We'll exit early if we find an index key that can accommodate the new key with no
401-
* penalty on any column. sum_grow is used to track this condition. Normally, it is the
402-
* sum of the penalties we've seen for this column so far, which is not a very useful
403-
* quantity in general because the penalties for each column are only considered
404-
* independently, but all we really care about is whether or not it's greater than zero.
405-
* Since penalties can't be negative, the sum of the penalties will be greater than
406-
* zero if and only if at least one penalty was greater than zero. To make things just
407-
* a bit more complicated, we arbitrarily set sum_grow to 1.0 whenever we want to force
408-
* the at least one more iteration of this outer loop. Any non-zero value would serve
409-
* just as well.
398+
* best_penalty[j] is the best penalty we have seen so far for column j,
399+
* or -1 when we haven't yet examined column j. Array entries to the
400+
* right of the first -1 are undefined.
410401
*/
411-
for (i = FirstOffsetNumber; i <= maxoff && sum_grow; i = OffsetNumberNext(i))
402+
best_penalty[0] = -1;
403+
404+
/*
405+
* Loop over tuples on page.
406+
*/
407+
maxoff = PageGetMaxOffsetNumber(p);
408+
Assert(maxoff >= FirstOffsetNumber);
409+
410+
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
412411
{
413-
int j;
414412
IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i));
413+
bool zero_penalty;
414+
int j;
415415

416-
sum_grow = 0;
416+
zero_penalty = true;
417417

418-
/* Loop over indexed attribtues. */
418+
/* Loop over index attributes. */
419419
for (j = 0; j < r->rd_att->natts; j++)
420420
{
421421
Datum datum;
422422
float usize;
423423
bool IsNull;
424424

425+
/* Compute penalty for this column. */
425426
datum = index_getattr(itup, j + 1, giststate->tupdesc, &IsNull);
426427
gistdentryinit(giststate, j, &entry, datum, r, p, i,
427428
FALSE, IsNull);
428429
usize = gistpenalty(giststate, j, &entry, IsNull,
429430
&identry[j], isnull[j]);
431+
if (usize > 0)
432+
zero_penalty = false;
430433

431-
if (which_grow[j] < 0 || usize < which_grow[j])
434+
if (best_penalty[j] < 0 || usize < best_penalty[j])
432435
{
433436
/*
434-
* We get here in two cases. First, we may have just discovered that the
435-
* current tuple is the best one we've seen so far; that is, for the first
436-
* column for which the penalty is not equal to the best tuple seen so far,
437-
* this one has a lower penalty than the previously-seen one. But, when
438-
* a new best tuple is found, we must record the best penalty value for
439-
* all the remaining columns. We'll end up here for each remaining index
440-
* column in that case, too.
437+
* New best penalty for column. Tentatively select this tuple
438+
* as the target, and record the best penalty. Then reset the
439+
* next column's penalty to "unknown" (and indirectly, the
440+
* same for all the ones to its right). This will force us to
441+
* adopt this tuple's penalty values as the best for all the
442+
* remaining columns during subsequent loop iterations.
441443
*/
442-
which = i;
443-
which_grow[j] = usize;
444+
result = i;
445+
best_penalty[j] = usize;
446+
444447
if (j < r->rd_att->natts - 1)
445-
which_grow[j + 1] = -1;
446-
sum_grow += which_grow[j];
448+
best_penalty[j + 1] = -1;
447449
}
448-
else if (which_grow[j] == usize)
450+
else if (best_penalty[j] == usize)
449451
{
450452
/*
451-
* The current tuple is exactly as good for this column as the best tuple
452-
* seen so far. The next iteration of this loop will compare the next
453-
* column.
453+
* The current tuple is exactly as good for this column as the
454+
* best tuple seen so far. The next iteration of this loop
455+
* will compare the next column.
454456
*/
455-
sum_grow += usize;
456457
}
457458
else
458459
{
459460
/*
460-
* The current tuple is worse for this column than the best tuple seen so
461-
* far. Skip the remaining columns and move on to the next tuple, if any.
461+
* The current tuple is worse for this column than the best
462+
* tuple seen so far. Skip the remaining columns and move on
463+
* to the next tuple, if any.
462464
*/
463-
sum_grow = 1;
465+
zero_penalty = false; /* so outer loop won't exit */
464466
break;
465467
}
466468
}
467-
}
468469

469-
if (which == InvalidOffsetNumber)
470-
which = FirstOffsetNumber;
470+
/*
471+
* If we find a tuple with zero penalty for all columns, there's no
472+
* need to examine remaining tuples; just break out of the loop and
473+
* return it.
474+
*/
475+
if (zero_penalty)
476+
break;
477+
}
471478

472-
return which;
479+
return result;
473480
}
474481

475482
/*

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy