Skip to content

Commit 5dc692f

Browse files
committed
Ensure proper alignment of tuples in HashMemoryChunkData buffers.
The previous coding relied (without any documentation) on the data[] member of HashMemoryChunkData being at a MAXALIGN'ed offset. If it was not, the tuples would not be maxaligned either, leading to failures on alignment-picky machines. While there seems to be no live bug on any platform we support, this is clearly pretty fragile: any addition to or rearrangement of the fields in HashMemoryChunkData could break it. Let's remove the hazard by getting rid of the data[] member and instead using pointer arithmetic with an explicitly maxalign'ed offset. Discussion: https://postgr.es/m/14483.1514938129@sss.pgh.pa.us
1 parent 54eff53 commit 5dc692f

File tree

2 files changed

+24
-22
lines changed

2 files changed

+24
-22
lines changed

src/backend/executor/nodeHash.c

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -979,7 +979,7 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
979979
/* process all tuples stored in this chunk (and then free it) */
980980
while (idx < oldchunks->used)
981981
{
982-
HashJoinTuple hashTuple = (HashJoinTuple) (oldchunks->data + idx);
982+
HashJoinTuple hashTuple = (HashJoinTuple) (HASH_CHUNK_DATA(oldchunks) + idx);
983983
MinimalTuple tuple = HJTUPLE_MINTUPLE(hashTuple);
984984
int hashTupleSize = (HJTUPLE_OVERHEAD + tuple->t_len);
985985
int bucketno;
@@ -1285,7 +1285,7 @@ ExecParallelHashRepartitionFirst(HashJoinTable hashtable)
12851285
/* Repartition all tuples in this chunk. */
12861286
while (idx < chunk->used)
12871287
{
1288-
HashJoinTuple hashTuple = (HashJoinTuple) (chunk->data + idx);
1288+
HashJoinTuple hashTuple = (HashJoinTuple) (HASH_CHUNK_DATA(chunk) + idx);
12891289
MinimalTuple tuple = HJTUPLE_MINTUPLE(hashTuple);
12901290
HashJoinTuple copyTuple;
12911291
dsa_pointer shared;
@@ -1469,7 +1469,7 @@ ExecHashIncreaseNumBuckets(HashJoinTable hashtable)
14691469

14701470
while (idx < chunk->used)
14711471
{
1472-
HashJoinTuple hashTuple = (HashJoinTuple) (chunk->data + idx);
1472+
HashJoinTuple hashTuple = (HashJoinTuple) (HASH_CHUNK_DATA(chunk) + idx);
14731473
int bucketno;
14741474
int batchno;
14751475

@@ -1552,7 +1552,7 @@ ExecParallelHashIncreaseNumBuckets(HashJoinTable hashtable)
15521552

15531553
while (idx < chunk->used)
15541554
{
1555-
HashJoinTuple hashTuple = (HashJoinTuple) (chunk->data + idx);
1555+
HashJoinTuple hashTuple = (HashJoinTuple) (HASH_CHUNK_DATA(chunk) + idx);
15561556
dsa_pointer shared = chunk_s + HASH_CHUNK_HEADER_SIZE + idx;
15571557
int bucketno;
15581558
int batchno;
@@ -2651,17 +2651,16 @@ dense_alloc(HashJoinTable hashtable, Size size)
26512651
size = MAXALIGN(size);
26522652

26532653
/*
2654-
* If tuple size is larger than of 1/4 of chunk size, allocate a separate
2655-
* chunk.
2654+
* If tuple size is larger than threshold, allocate a separate chunk.
26562655
*/
26572656
if (size > HASH_CHUNK_THRESHOLD)
26582657
{
26592658
/* allocate new chunk and put it at the beginning of the list */
26602659
newChunk = (HashMemoryChunk) MemoryContextAlloc(hashtable->batchCxt,
2661-
offsetof(HashMemoryChunkData, data) + size);
2660+
HASH_CHUNK_HEADER_SIZE + size);
26622661
newChunk->maxlen = size;
2663-
newChunk->used = 0;
2664-
newChunk->ntuples = 0;
2662+
newChunk->used = size;
2663+
newChunk->ntuples = 1;
26652664

26662665
/*
26672666
* Add this chunk to the list after the first existing chunk, so that
@@ -2678,10 +2677,7 @@ dense_alloc(HashJoinTable hashtable, Size size)
26782677
hashtable->chunks = newChunk;
26792678
}
26802679

2681-
newChunk->used += size;
2682-
newChunk->ntuples += 1;
2683-
2684-
return newChunk->data;
2680+
return HASH_CHUNK_DATA(newChunk);
26852681
}
26862682

26872683
/*
@@ -2693,7 +2689,7 @@ dense_alloc(HashJoinTable hashtable, Size size)
26932689
{
26942690
/* allocate new chunk and put it at the beginning of the list */
26952691
newChunk = (HashMemoryChunk) MemoryContextAlloc(hashtable->batchCxt,
2696-
offsetof(HashMemoryChunkData, data) + HASH_CHUNK_SIZE);
2692+
HASH_CHUNK_HEADER_SIZE + HASH_CHUNK_SIZE);
26972693

26982694
newChunk->maxlen = HASH_CHUNK_SIZE;
26992695
newChunk->used = size;
@@ -2702,11 +2698,11 @@ dense_alloc(HashJoinTable hashtable, Size size)
27022698
newChunk->next.unshared = hashtable->chunks;
27032699
hashtable->chunks = newChunk;
27042700

2705-
return newChunk->data;
2701+
return HASH_CHUNK_DATA(newChunk);
27062702
}
27072703

27082704
/* There is enough space in the current chunk, let's add the tuple */
2709-
ptr = hashtable->chunks->data + hashtable->chunks->used;
2705+
ptr = HASH_CHUNK_DATA(hashtable->chunks) + hashtable->chunks->used;
27102706
hashtable->chunks->used += size;
27112707
hashtable->chunks->ntuples += 1;
27122708

@@ -2751,7 +2747,7 @@ ExecParallelHashTupleAlloc(HashJoinTable hashtable, size_t size,
27512747
chunk_shared = hashtable->current_chunk_shared;
27522748
Assert(chunk == dsa_get_address(hashtable->area, chunk_shared));
27532749
*shared = chunk_shared + HASH_CHUNK_HEADER_SIZE + chunk->used;
2754-
result = (HashJoinTuple) (chunk->data + chunk->used);
2750+
result = (HashJoinTuple) (HASH_CHUNK_DATA(chunk) + chunk->used);
27552751
chunk->used += size;
27562752

27572753
Assert(chunk->used <= chunk->maxlen);
@@ -2859,8 +2855,8 @@ ExecParallelHashTupleAlloc(HashJoinTable hashtable, size_t size,
28592855
}
28602856
LWLockRelease(&pstate->lock);
28612857

2862-
Assert(chunk->data == dsa_get_address(hashtable->area, *shared));
2863-
result = (HashJoinTuple) chunk->data;
2858+
Assert(HASH_CHUNK_DATA(chunk) == dsa_get_address(hashtable->area, *shared));
2859+
result = (HashJoinTuple) HASH_CHUNK_DATA(chunk);
28642860

28652861
return result;
28662862
}

src/include/executor/hashjoin.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ typedef struct HashSkewBucket
117117
typedef struct HashMemoryChunkData
118118
{
119119
int ntuples; /* number of tuples stored in this chunk */
120-
size_t maxlen; /* size of the buffer holding the tuples */
120+
size_t maxlen; /* size of the chunk's tuple buffer */
121121
size_t used; /* number of buffer bytes already used */
122122

123123
/* pointer to the next chunk (linked list) */
@@ -127,13 +127,19 @@ typedef struct HashMemoryChunkData
127127
dsa_pointer shared;
128128
} next;
129129

130-
char data[FLEXIBLE_ARRAY_MEMBER]; /* buffer allocated at the end */
130+
/*
131+
* The chunk's tuple buffer starts after the HashMemoryChunkData struct,
132+
* at offset HASH_CHUNK_HEADER_SIZE (which must be maxaligned). Note that
133+
* that offset is not included in "maxlen" or "used".
134+
*/
131135
} HashMemoryChunkData;
132136

133137
typedef struct HashMemoryChunkData *HashMemoryChunk;
134138

135139
#define HASH_CHUNK_SIZE (32 * 1024L)
136-
#define HASH_CHUNK_HEADER_SIZE (offsetof(HashMemoryChunkData, data))
140+
#define HASH_CHUNK_HEADER_SIZE MAXALIGN(sizeof(HashMemoryChunkData))
141+
#define HASH_CHUNK_DATA(hc) (((char *) (hc)) + HASH_CHUNK_HEADER_SIZE)
142+
/* tuples exceeding HASH_CHUNK_THRESHOLD bytes are put in their own chunk */
137143
#define HASH_CHUNK_THRESHOLD (HASH_CHUNK_SIZE / 4)
138144

139145
/*

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy