Skip to content

Commit 39673ca

Browse files
committed
Rewrite hashbulkdelete() to make it amenable to new bucket locking
scheme. A pleasant side effect is that it is *much* faster when deleting a large fraction of the indexed tuples, because of elimination of redundant hash_step activity induced by hash_adjscans. Various other continuing code cleanup.
1 parent 5f65345 commit 39673ca

File tree

6 files changed

+231
-76
lines changed

6 files changed

+231
-76
lines changed

src/backend/access/hash/hash.c

Lines changed: 158 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.65 2003/08/04 02:39:57 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.66 2003/09/02 02:18:38 tgl Exp $
1212
*
1313
* NOTES
1414
* This file contains only the public interface routines.
@@ -449,40 +449,178 @@ hashbulkdelete(PG_FUNCTION_ARGS)
449449
BlockNumber num_pages;
450450
double tuples_removed;
451451
double num_index_tuples;
452-
IndexScanDesc iscan;
452+
uint32 deleted_tuples;
453+
uint32 tuples_remaining;
454+
uint32 orig_ntuples;
455+
Bucket orig_maxbucket;
456+
Bucket cur_maxbucket;
457+
Bucket cur_bucket;
458+
Buffer metabuf;
459+
HashMetaPage metap;
460+
HashMetaPageData local_metapage;
453461

462+
/*
463+
* keep track of counts in both float form (to return) and integer form
464+
* (to update hashm_ntuples). It'd be better to make hashm_ntuples a
465+
* double, but that will have to wait for an initdb.
466+
*/
454467
tuples_removed = 0;
455468
num_index_tuples = 0;
469+
deleted_tuples = 0;
470+
tuples_remaining = 0;
456471

457472
/*
458-
* XXX generic implementation --- should be improved!
473+
* Read the metapage to fetch original bucket and tuple counts. Also,
474+
* we keep a copy of the last-seen metapage so that we can use its
475+
* hashm_spares[] values to compute bucket page addresses. This is a
476+
* bit hokey but perfectly safe, since the interesting entries in the
477+
* spares array cannot change under us; and it beats rereading the
478+
* metapage for each bucket.
459479
*/
480+
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
481+
metap = (HashMetaPage) BufferGetPage(metabuf);
482+
_hash_checkpage((Page) metap, LH_META_PAGE);
483+
orig_maxbucket = metap->hashm_maxbucket;
484+
orig_ntuples = metap->hashm_ntuples;
485+
memcpy(&local_metapage, metap, sizeof(local_metapage));
486+
_hash_relbuf(rel, metabuf, HASH_READ);
487+
488+
/* Scan the buckets that we know exist */
489+
cur_bucket = 0;
490+
cur_maxbucket = orig_maxbucket;
491+
492+
loop_top:
493+
while (cur_bucket <= cur_maxbucket)
494+
{
495+
BlockNumber bucket_blkno;
496+
BlockNumber blkno;
497+
bool bucket_dirty = false;
460498

461-
/* walk through the entire index */
462-
iscan = index_beginscan(NULL, rel, SnapshotAny, 0, (ScanKey) NULL);
463-
/* including killed tuples */
464-
iscan->ignore_killed_tuples = false;
499+
/* Get address of bucket's start page */
500+
bucket_blkno = BUCKET_TO_BLKNO(&local_metapage, cur_bucket);
465501

466-
while (index_getnext_indexitem(iscan, ForwardScanDirection))
467-
{
468-
if (callback(&iscan->xs_ctup.t_self, callback_state))
502+
/* XXX lock bucket here */
503+
504+
/* Scan each page in bucket */
505+
blkno = bucket_blkno;
506+
while (BlockNumberIsValid(blkno))
469507
{
470-
ItemPointerData indextup = iscan->currentItemData;
508+
Buffer buf;
509+
Page page;
510+
HashPageOpaque opaque;
511+
OffsetNumber offno;
512+
OffsetNumber maxoffno;
513+
bool page_dirty = false;
514+
515+
buf = _hash_getbuf(rel, blkno, HASH_WRITE);
516+
page = BufferGetPage(buf);
517+
_hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
518+
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
519+
Assert(opaque->hasho_bucket == cur_bucket);
520+
521+
/* Scan each tuple in page */
522+
offno = FirstOffsetNumber;
523+
maxoffno = PageGetMaxOffsetNumber(page);
524+
while (offno <= maxoffno)
525+
{
526+
HashItem hitem;
527+
ItemPointer htup;
528+
529+
hitem = (HashItem) PageGetItem(page,
530+
PageGetItemId(page, offno));
531+
htup = &(hitem->hash_itup.t_tid);
532+
if (callback(htup, callback_state))
533+
{
534+
ItemPointerData indextup;
535+
536+
/* adjust any active scans that will be affected */
537+
/* (this should be unnecessary) */
538+
ItemPointerSet(&indextup, blkno, offno);
539+
_hash_adjscans(rel, &indextup);
540+
541+
/* delete the item from the page */
542+
PageIndexTupleDelete(page, offno);
543+
bucket_dirty = page_dirty = true;
544+
545+
/* don't increment offno, instead decrement maxoffno */
546+
maxoffno = OffsetNumberPrev(maxoffno);
547+
548+
tuples_removed += 1;
549+
deleted_tuples += 1;
550+
}
551+
else
552+
{
553+
offno = OffsetNumberNext(offno);
554+
555+
num_index_tuples += 1;
556+
tuples_remaining += 1;
557+
}
558+
}
471559

472-
/* adjust any active scans that will be affected by deletion */
473-
/* (namely, my own scan) */
474-
_hash_adjscans(rel, &indextup);
560+
/*
561+
* Write or free page if needed, advance to next page. We want
562+
* to preserve the invariant that overflow pages are nonempty.
563+
*/
564+
blkno = opaque->hasho_nextblkno;
565+
566+
if (PageIsEmpty(page) && (opaque->hasho_flag & LH_OVERFLOW_PAGE))
567+
_hash_freeovflpage(rel, buf);
568+
else if (page_dirty)
569+
_hash_wrtbuf(rel, buf);
570+
else
571+
_hash_relbuf(rel, buf, HASH_WRITE);
572+
}
475573

476-
/* delete the data from the page */
477-
_hash_pagedel(rel, &indextup);
574+
/* If we deleted anything, try to compact free space */
575+
if (bucket_dirty)
576+
_hash_squeezebucket(rel, cur_bucket, bucket_blkno);
478577

479-
tuples_removed += 1;
480-
}
578+
/* XXX unlock bucket here */
579+
580+
/* Advance to next bucket */
581+
cur_bucket++;
582+
}
583+
584+
/* Write-lock metapage and check for split since we started */
585+
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
586+
metap = (HashMetaPage) BufferGetPage(metabuf);
587+
_hash_checkpage((Page) metap, LH_META_PAGE);
588+
589+
if (cur_maxbucket != metap->hashm_maxbucket)
590+
{
591+
/* There's been a split, so process the additional bucket(s) */
592+
cur_maxbucket = metap->hashm_maxbucket;
593+
memcpy(&local_metapage, metap, sizeof(local_metapage));
594+
_hash_relbuf(rel, metabuf, HASH_WRITE);
595+
goto loop_top;
596+
}
597+
598+
/* Okay, we're really done. Update tuple count in metapage. */
599+
600+
if (orig_maxbucket == metap->hashm_maxbucket &&
601+
orig_ntuples == metap->hashm_ntuples)
602+
{
603+
/*
604+
* No one has split or inserted anything since start of scan,
605+
* so believe our count as gospel.
606+
*/
607+
metap->hashm_ntuples = tuples_remaining;
608+
}
609+
else
610+
{
611+
/*
612+
* Otherwise, our count is untrustworthy since we may have
613+
* double-scanned tuples in split buckets. Proceed by
614+
* dead-reckoning.
615+
*/
616+
if (metap->hashm_ntuples > deleted_tuples)
617+
metap->hashm_ntuples -= deleted_tuples;
481618
else
482-
num_index_tuples += 1;
619+
metap->hashm_ntuples = 0;
620+
num_index_tuples = metap->hashm_ntuples;
483621
}
484622

485-
index_endscan(iscan);
623+
_hash_wrtbuf(rel, metabuf);
486624

487625
/* return statistics */
488626
num_pages = RelationGetNumberOfBlocks(rel);

src/backend/access/hash/hashovfl.c

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.38 2003/09/01 20:26:34 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.39 2003/09/02 02:18:38 tgl Exp $
1212
*
1313
* NOTES
1414
* Overflow pages look like ordinary relation pages.
@@ -444,11 +444,13 @@ _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno)
444444
* first page in the bucket chain. The read page works backward and
445445
* the write page works forward; the procedure terminates when the
446446
* read page and write page are the same page.
447+
*
448+
* Caller must hold exclusive lock on the target bucket.
447449
*/
448450
void
449451
_hash_squeezebucket(Relation rel,
450-
HashMetaPage metap,
451-
Bucket bucket)
452+
Bucket bucket,
453+
BlockNumber bucket_blkno)
452454
{
453455
Buffer wbuf;
454456
Buffer rbuf = 0;
@@ -466,7 +468,7 @@ _hash_squeezebucket(Relation rel,
466468
/*
467469
* start squeezing into the base bucket page.
468470
*/
469-
wblkno = BUCKET_TO_BLKNO(bucket);
471+
wblkno = bucket_blkno;
470472
wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE);
471473
wpage = BufferGetPage(wbuf);
472474
_hash_checkpage(wpage, LH_BUCKET_PAGE);
@@ -484,11 +486,6 @@ _hash_squeezebucket(Relation rel,
484486
/*
485487
* find the last page in the bucket chain by starting at the base
486488
* bucket page and working forward.
487-
*
488-
* XXX if chains tend to be long, we should probably move forward using
489-
* HASH_READ and then _hash_chgbufaccess to HASH_WRITE when we reach
490-
* the end. if they are short we probably don't care very much. if
491-
* the hash function is working at all, they had better be short..
492489
*/
493490
ropaque = wopaque;
494491
do

src/backend/access/hash/hashpage.c

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.39 2003/09/01 20:26:34 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.40 2003/09/02 02:18:38 tgl Exp $
1212
*
1313
* NOTES
1414
* Postgres hash pages look like ordinary relation pages. The opaque
@@ -143,7 +143,7 @@ _hash_metapinit(Relation rel)
143143
*/
144144
for (i = 0; i <= 1; i++)
145145
{
146-
buf = _hash_getbuf(rel, BUCKET_TO_BLKNO(i), HASH_WRITE);
146+
buf = _hash_getbuf(rel, BUCKET_TO_BLKNO(metap, i), HASH_WRITE);
147147
pg = BufferGetPage(buf);
148148
_hash_pageinit(pg, BufferGetPageSize(buf));
149149
pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
@@ -456,6 +456,8 @@ _hash_splitbucket(Relation rel,
456456
Buffer ovflbuf;
457457
BlockNumber oblkno;
458458
BlockNumber nblkno;
459+
BlockNumber start_oblkno;
460+
BlockNumber start_nblkno;
459461
bool null;
460462
Datum datum;
461463
HashItem hitem;
@@ -475,8 +477,10 @@ _hash_splitbucket(Relation rel,
475477
_hash_checkpage((Page) metap, LH_META_PAGE);
476478

477479
/* get the buffers & pages */
478-
oblkno = BUCKET_TO_BLKNO(obucket);
479-
nblkno = BUCKET_TO_BLKNO(nbucket);
480+
start_oblkno = BUCKET_TO_BLKNO(metap, obucket);
481+
start_nblkno = BUCKET_TO_BLKNO(metap, nbucket);
482+
oblkno = start_oblkno;
483+
nblkno = start_nblkno;
480484
obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);
481485
nbuf = _hash_getbuf(rel, nblkno, HASH_WRITE);
482486
opage = BufferGetPage(obuf);
@@ -571,7 +575,7 @@ _hash_splitbucket(Relation rel,
571575
*/
572576
_hash_wrtbuf(rel, obuf);
573577
_hash_wrtbuf(rel, nbuf);
574-
_hash_squeezebucket(rel, metap, obucket);
578+
_hash_squeezebucket(rel, obucket, start_oblkno);
575579
return;
576580
}
577581
}
@@ -639,7 +643,7 @@ _hash_splitbucket(Relation rel,
639643
if (!BlockNumberIsValid(oblkno))
640644
{
641645
_hash_wrtbuf(rel, nbuf);
642-
_hash_squeezebucket(rel, metap, obucket);
646+
_hash_squeezebucket(rel, obucket, start_oblkno);
643647
return;
644648
}
645649

src/backend/access/hash/hashsearch.c

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashsearch.c,v 1.31 2003/08/04 02:39:57 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashsearch.c,v 1.32 2003/09/02 02:18:38 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -19,8 +19,10 @@
1919

2020

2121
/*
22-
* _hash_search() -- Finds the page/bucket that the contains the
23-
* scankey and loads it into *bufP. the buffer has a read lock.
22+
* _hash_search() -- Find the bucket that contains the scankey
23+
* and fetch its primary bucket page into *bufP.
24+
*
25+
* the buffer has a read lock.
2426
*/
2527
void
2628
_hash_search(Relation rel,
@@ -30,22 +32,23 @@ _hash_search(Relation rel,
3032
HashMetaPage metap)
3133
{
3234
BlockNumber blkno;
33-
Datum keyDatum;
3435
Bucket bucket;
3536

36-
if (scankey == (ScanKey) NULL ||
37-
(keyDatum = scankey[0].sk_argument) == (Datum) NULL)
37+
if (scankey == NULL)
3838
{
3939
/*
40-
* If the scankey argument is NULL, all tuples will satisfy the
40+
* If the scankey is empty, all tuples will satisfy the
4141
* scan so we start the scan at the first bucket (bucket 0).
4242
*/
4343
bucket = 0;
4444
}
4545
else
46-
bucket = _hash_call(rel, metap, keyDatum);
46+
{
47+
Assert(!(scankey[0].sk_flags & SK_ISNULL));
48+
bucket = _hash_call(rel, metap, scankey[0].sk_argument);
49+
}
4750

48-
blkno = BUCKET_TO_BLKNO(bucket);
51+
blkno = BUCKET_TO_BLKNO(metap, bucket);
4952

5053
*bufP = _hash_getbuf(rel, blkno, HASH_READ);
5154
}
@@ -330,7 +333,7 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir, Buffer metabuf)
330333
if (allbuckets && bucket < metap->hashm_maxbucket)
331334
{
332335
++bucket;
333-
blkno = BUCKET_TO_BLKNO(bucket);
336+
blkno = BUCKET_TO_BLKNO(metap, bucket);
334337
buf = _hash_getbuf(rel, blkno, HASH_READ);
335338
page = BufferGetPage(buf);
336339
_hash_checkpage(page, LH_BUCKET_PAGE);
@@ -380,7 +383,7 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir, Buffer metabuf)
380383
if (allbuckets && bucket > 0)
381384
{
382385
--bucket;
383-
blkno = BUCKET_TO_BLKNO(bucket);
386+
blkno = BUCKET_TO_BLKNO(metap, bucket);
384387
buf = _hash_getbuf(rel, blkno, HASH_READ);
385388
page = BufferGetPage(buf);
386389
_hash_checkpage(page, LH_BUCKET_PAGE);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy