Skip to content

Commit 69d0a15

Browse files
committed
Convert hash join code to use MinimalTuple format in tuple hash table
and batch files. Should reduce memory and I/O demands for such joins.
1 parent 665c5e8 commit 69d0a15

File tree

7 files changed

+121
-72
lines changed

7 files changed

+121
-72
lines changed

src/backend/executor/execTuples.c

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
*
1616
*
1717
* IDENTIFICATION
18-
* $PostgreSQL: pgsql/src/backend/executor/execTuples.c,v 1.95 2006/06/27 02:51:39 tgl Exp $
18+
* $PostgreSQL: pgsql/src/backend/executor/execTuples.c,v 1.96 2006/06/27 21:31:20 tgl Exp $
1919
*
2020
*-------------------------------------------------------------------------
2121
*/
@@ -718,6 +718,55 @@ ExecFetchSlotTuple(TupleTableSlot *slot)
718718
return ExecMaterializeSlot(slot);
719719
}
720720

721+
/* --------------------------------
722+
* ExecFetchSlotMinimalTuple
723+
* Fetch the slot's minimal physical tuple.
724+
*
725+
* If the slot contains a virtual tuple, we convert it to minimal
726+
* physical form. The slot retains ownership of the physical tuple.
727+
* Likewise, if it contains a regular tuple we convert to minimal form.
728+
*
729+
* As above, the result must be treated as read-only.
730+
* --------------------------------
731+
*/
732+
MinimalTuple
733+
ExecFetchSlotMinimalTuple(TupleTableSlot *slot)
734+
{
735+
MinimalTuple newTuple;
736+
MemoryContext oldContext;
737+
738+
/*
739+
* sanity checks
740+
*/
741+
Assert(slot != NULL);
742+
Assert(!slot->tts_isempty);
743+
744+
/*
745+
* If we have a minimal physical tuple then just return it.
746+
*/
747+
if (slot->tts_mintuple)
748+
return slot->tts_mintuple;
749+
750+
/*
751+
* Otherwise, build a minimal tuple, and then store it as the new slot
752+
* value. (Note: tts_nvalid will be reset to zero here. There are cases
753+
* in which this could be optimized but it's probably not worth worrying
754+
* about.)
755+
*
756+
* We may be called in a context that is shorter-lived than the tuple
757+
* slot, but we have to ensure that the materialized tuple will survive
758+
* anyway.
759+
*/
760+
oldContext = MemoryContextSwitchTo(slot->tts_mcxt);
761+
newTuple = ExecCopySlotMinimalTuple(slot);
762+
MemoryContextSwitchTo(oldContext);
763+
764+
ExecStoreMinimalTuple(newTuple, slot, true);
765+
766+
Assert(slot->tts_mintuple);
767+
return slot->tts_mintuple;
768+
}
769+
721770
/* --------------------------------
722771
* ExecMaterializeSlot
723772
* Force a slot into the "materialized" state.

src/backend/executor/nodeHash.c

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.103 2006/05/30 14:01:58 momjian Exp $
11+
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.104 2006/06/27 21:31:20 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -92,7 +92,7 @@ MultiExecHash(HashState *node)
9292
/* We have to compute the hash value */
9393
econtext->ecxt_innertuple = slot;
9494
hashvalue = ExecHashGetHashValue(hashtable, econtext, hashkeys);
95-
ExecHashTableInsert(hashtable, ExecFetchSlotTuple(slot), hashvalue);
95+
ExecHashTableInsert(hashtable, slot, hashvalue);
9696
}
9797

9898
/* must provide our own instrumentation support */
@@ -358,8 +358,8 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
358358
* does not allow for any palloc overhead. The manipulations of spaceUsed
359359
* don't count palloc overhead either.
360360
*/
361-
tupsize = MAXALIGN(sizeof(HashJoinTupleData)) +
362-
MAXALIGN(sizeof(HeapTupleHeaderData)) +
361+
tupsize = HJTUPLE_OVERHEAD +
362+
MAXALIGN(sizeof(MinimalTupleData)) +
363363
MAXALIGN(tupwidth);
364364
inner_rel_bytes = ntuples * tupsize;
365365

@@ -548,7 +548,8 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
548548
{
549549
/* dump it out */
550550
Assert(batchno > curbatch);
551-
ExecHashJoinSaveTuple(&tuple->htup, tuple->hashvalue,
551+
ExecHashJoinSaveTuple(HJTUPLE_MINTUPLE(tuple),
552+
tuple->hashvalue,
552553
&hashtable->innerBatchFile[batchno]);
553554
/* and remove from hash table */
554555
if (prevtuple)
@@ -557,7 +558,7 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
557558
hashtable->buckets[i] = nexttuple;
558559
/* prevtuple doesn't change */
559560
hashtable->spaceUsed -=
560-
MAXALIGN(sizeof(HashJoinTupleData)) + tuple->htup.t_len;
561+
HJTUPLE_OVERHEAD + HJTUPLE_MINTUPLE(tuple)->t_len;
561562
pfree(tuple);
562563
nfreed++;
563564
}
@@ -592,12 +593,19 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
592593
* ExecHashTableInsert
593594
* insert a tuple into the hash table depending on the hash value
594595
* it may just go to a temp file for later batches
596+
*
597+
* Note: the passed TupleTableSlot may contain a regular, minimal, or virtual
598+
* tuple; the minimal case in particular is certain to happen while reloading
599+
* tuples from batch files. We could save some cycles in the regular-tuple
600+
* case by not forcing the slot contents into minimal form; not clear if it's
601+
* worth the messiness required.
595602
*/
596603
void
597604
ExecHashTableInsert(HashJoinTable hashtable,
598-
HeapTuple tuple,
605+
TupleTableSlot *slot,
599606
uint32 hashvalue)
600607
{
608+
MinimalTuple tuple = ExecFetchSlotMinimalTuple(slot);
601609
int bucketno;
602610
int batchno;
603611

@@ -615,18 +623,11 @@ ExecHashTableInsert(HashJoinTable hashtable,
615623
HashJoinTuple hashTuple;
616624
int hashTupleSize;
617625

618-
hashTupleSize = MAXALIGN(sizeof(HashJoinTupleData)) + tuple->t_len;
626+
hashTupleSize = HJTUPLE_OVERHEAD + tuple->t_len;
619627
hashTuple = (HashJoinTuple) MemoryContextAlloc(hashtable->batchCxt,
620628
hashTupleSize);
621629
hashTuple->hashvalue = hashvalue;
622-
memcpy((char *) &hashTuple->htup,
623-
(char *) tuple,
624-
sizeof(hashTuple->htup));
625-
hashTuple->htup.t_data = (HeapTupleHeader)
626-
(((char *) hashTuple) + MAXALIGN(sizeof(HashJoinTupleData)));
627-
memcpy((char *) hashTuple->htup.t_data,
628-
(char *) tuple->t_data,
629-
tuple->t_len);
630+
memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len);
630631
hashTuple->next = hashtable->buckets[bucketno];
631632
hashtable->buckets[bucketno] = hashTuple;
632633
hashtable->spaceUsed += hashTupleSize;
@@ -639,7 +640,8 @@ ExecHashTableInsert(HashJoinTable hashtable,
639640
* put the tuple into a temp file for later batches
640641
*/
641642
Assert(batchno > hashtable->curbatch);
642-
ExecHashJoinSaveTuple(tuple, hashvalue,
643+
ExecHashJoinSaveTuple(tuple,
644+
hashvalue,
643645
&hashtable->innerBatchFile[batchno]);
644646
}
645647
}
@@ -749,7 +751,7 @@ ExecHashGetBucketAndBatch(HashJoinTable hashtable,
749751
*
750752
* The current outer tuple must be stored in econtext->ecxt_outertuple.
751753
*/
752-
HeapTuple
754+
HashJoinTuple
753755
ExecScanHashBucket(HashJoinState *hjstate,
754756
ExprContext *econtext)
755757
{
@@ -771,14 +773,12 @@ ExecScanHashBucket(HashJoinState *hjstate,
771773
{
772774
if (hashTuple->hashvalue == hashvalue)
773775
{
774-
HeapTuple heapTuple = &hashTuple->htup;
775776
TupleTableSlot *inntuple;
776777

777778
/* insert hashtable's tuple into exec slot so ExecQual sees it */
778-
inntuple = ExecStoreTuple(heapTuple,
779-
hjstate->hj_HashTupleSlot,
780-
InvalidBuffer,
781-
false); /* do not pfree */
779+
inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(hashTuple),
780+
hjstate->hj_HashTupleSlot,
781+
false); /* do not pfree */
782782
econtext->ecxt_innertuple = inntuple;
783783

784784
/* reset temp memory each time to avoid leaks from qual expr */
@@ -787,7 +787,7 @@ ExecScanHashBucket(HashJoinState *hjstate,
787787
if (ExecQual(hjclauses, econtext, false))
788788
{
789789
hjstate->hj_CurTuple = hashTuple;
790-
return heapTuple;
790+
return hashTuple;
791791
}
792792
}
793793

src/backend/executor/nodeHashjoin.c

Lines changed: 33 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.82 2006/06/16 18:42:22 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.83 2006/06/27 21:31:20 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -54,7 +54,7 @@ ExecHashJoin(HashJoinState *node)
5454
ExprContext *econtext;
5555
ExprDoneCond isDone;
5656
HashJoinTable hashtable;
57-
HeapTuple curtuple;
57+
HashJoinTuple curtuple;
5858
TupleTableSlot *outerTupleSlot;
5959
uint32 hashvalue;
6060
int batchno;
@@ -224,7 +224,7 @@ ExecHashJoin(HashJoinState *node)
224224
* in the corresponding outer-batch file.
225225
*/
226226
Assert(batchno > hashtable->curbatch);
227-
ExecHashJoinSaveTuple(ExecFetchSlotTuple(outerTupleSlot),
227+
ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot),
228228
hashvalue,
229229
&hashtable->outerBatchFile[batchno]);
230230
node->hj_NeedNewOuter = true;
@@ -244,10 +244,9 @@ ExecHashJoin(HashJoinState *node)
244244
/*
245245
* we've got a match, but still need to test non-hashed quals
246246
*/
247-
inntuple = ExecStoreTuple(curtuple,
248-
node->hj_HashTupleSlot,
249-
InvalidBuffer,
250-
false); /* don't pfree this tuple */
247+
inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(curtuple),
248+
node->hj_HashTupleSlot,
249+
false); /* don't pfree */
251250
econtext->ecxt_innertuple = inntuple;
252251

253252
/* reset temp memory each time to avoid leaks from qual expr */
@@ -706,9 +705,7 @@ ExecHashJoinNewBatch(HashJoinState *hjstate)
706705
* NOTE: some tuples may be sent to future batches. Also, it is
707706
* possible for hashtable->nbatch to be increased here!
708707
*/
709-
ExecHashTableInsert(hashtable,
710-
ExecFetchSlotTuple(slot),
711-
hashvalue);
708+
ExecHashTableInsert(hashtable, slot, hashvalue);
712709
}
713710

714711
/*
@@ -741,15 +738,14 @@ ExecHashJoinNewBatch(HashJoinState *hjstate)
741738
* save a tuple to a batch file.
742739
*
743740
* The data recorded in the file for each tuple is its hash value,
744-
* then an image of its HeapTupleData (with meaningless t_data pointer)
745-
* followed by the HeapTupleHeader and tuple data.
741+
* then the tuple in MinimalTuple format.
746742
*
747743
* Note: it is important always to call this in the regular executor
748744
* context, not in a shorter-lived context; else the temp file buffers
749745
* will get messed up.
750746
*/
751747
void
752-
ExecHashJoinSaveTuple(HeapTuple heapTuple, uint32 hashvalue,
748+
ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue,
753749
BufFile **fileptr)
754750
{
755751
BufFile *file = *fileptr;
@@ -768,14 +764,8 @@ ExecHashJoinSaveTuple(HeapTuple heapTuple, uint32 hashvalue,
768764
(errcode_for_file_access(),
769765
errmsg("could not write to hash-join temporary file: %m")));
770766

771-
written = BufFileWrite(file, (void *) heapTuple, sizeof(HeapTupleData));
772-
if (written != sizeof(HeapTupleData))
773-
ereport(ERROR,
774-
(errcode_for_file_access(),
775-
errmsg("could not write to hash-join temporary file: %m")));
776-
777-
written = BufFileWrite(file, (void *) heapTuple->t_data, heapTuple->t_len);
778-
if (written != (size_t) heapTuple->t_len)
767+
written = BufFileWrite(file, (void *) tuple, tuple->t_len);
768+
if (written != tuple->t_len)
779769
ereport(ERROR,
780770
(errcode_for_file_access(),
781771
errmsg("could not write to hash-join temporary file: %m")));
@@ -794,32 +784,36 @@ ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
794784
uint32 *hashvalue,
795785
TupleTableSlot *tupleSlot)
796786
{
797-
HeapTupleData htup;
787+
uint32 header[2];
798788
size_t nread;
799-
HeapTuple heapTuple;
789+
MinimalTuple tuple;
800790

801-
nread = BufFileRead(file, (void *) hashvalue, sizeof(uint32));
802-
if (nread == 0)
803-
return NULL; /* end of file */
804-
if (nread != sizeof(uint32))
805-
ereport(ERROR,
806-
(errcode_for_file_access(),
807-
errmsg("could not read from hash-join temporary file: %m")));
808-
nread = BufFileRead(file, (void *) &htup, sizeof(HeapTupleData));
809-
if (nread != sizeof(HeapTupleData))
791+
/*
792+
* Since both the hash value and the MinimalTuple length word are
793+
* uint32, we can read them both in one BufFileRead() call without
794+
* any type cheating.
795+
*/
796+
nread = BufFileRead(file, (void *) header, sizeof(header));
797+
if (nread == 0) /* end of file */
798+
{
799+
ExecClearTuple(tupleSlot);
800+
return NULL;
801+
}
802+
if (nread != sizeof(header))
810803
ereport(ERROR,
811804
(errcode_for_file_access(),
812805
errmsg("could not read from hash-join temporary file: %m")));
813-
heapTuple = palloc(HEAPTUPLESIZE + htup.t_len);
814-
memcpy((char *) heapTuple, (char *) &htup, sizeof(HeapTupleData));
815-
heapTuple->t_data = (HeapTupleHeader)
816-
((char *) heapTuple + HEAPTUPLESIZE);
817-
nread = BufFileRead(file, (void *) heapTuple->t_data, htup.t_len);
818-
if (nread != (size_t) htup.t_len)
806+
*hashvalue = header[0];
807+
tuple = (MinimalTuple) palloc(header[1]);
808+
tuple->t_len = header[1];
809+
nread = BufFileRead(file,
810+
(void *) ((char *) tuple + sizeof(uint32)),
811+
header[1] - sizeof(uint32));
812+
if (nread != header[1] - sizeof(uint32))
819813
ereport(ERROR,
820814
(errcode_for_file_access(),
821815
errmsg("could not read from hash-join temporary file: %m")));
822-
return ExecStoreTuple(heapTuple, tupleSlot, InvalidBuffer, true);
816+
return ExecStoreMinimalTuple(tuple, tupleSlot, true);
823817
}
824818

825819

src/include/executor/hashjoin.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.38 2006/03/05 15:58:56 momjian Exp $
10+
* $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.39 2006/06/27 21:31:20 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -65,9 +65,14 @@ typedef struct HashJoinTupleData
6565
{
6666
struct HashJoinTupleData *next; /* link to next tuple in same bucket */
6767
uint32 hashvalue; /* tuple's hash code */
68-
HeapTupleData htup; /* tuple header */
68+
/* Tuple data, in MinimalTuple format, follows on a MAXALIGN boundary */
6969
} HashJoinTupleData;
7070

71+
#define HJTUPLE_OVERHEAD MAXALIGN(sizeof(HashJoinTupleData))
72+
#define HJTUPLE_MINTUPLE(hjtup) \
73+
((MinimalTuple) ((char *) (hjtup) + HJTUPLE_OVERHEAD))
74+
75+
7176
typedef struct HashJoinTableData
7277
{
7378
int nbuckets; /* # buckets in the in-memory hash table */

src/include/executor/nodeHash.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.40 2006/03/05 15:58:56 momjian Exp $
10+
* $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.41 2006/06/27 21:31:20 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -26,7 +26,7 @@ extern void ExecReScanHash(HashState *node, ExprContext *exprCtxt);
2626
extern HashJoinTable ExecHashTableCreate(Hash *node, List *hashOperators);
2727
extern void ExecHashTableDestroy(HashJoinTable hashtable);
2828
extern void ExecHashTableInsert(HashJoinTable hashtable,
29-
HeapTuple tuple,
29+
TupleTableSlot *slot,
3030
uint32 hashvalue);
3131
extern uint32 ExecHashGetHashValue(HashJoinTable hashtable,
3232
ExprContext *econtext,
@@ -35,7 +35,7 @@ extern void ExecHashGetBucketAndBatch(HashJoinTable hashtable,
3535
uint32 hashvalue,
3636
int *bucketno,
3737
int *batchno);
38-
extern HeapTuple ExecScanHashBucket(HashJoinState *hjstate,
38+
extern HashJoinTuple ExecScanHashBucket(HashJoinState *hjstate,
3939
ExprContext *econtext);
4040
extern void ExecHashTableReset(HashJoinTable hashtable);
4141
extern void ExecChooseHashTableSize(double ntuples, int tupwidth,

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy