Skip to content

Commit 2924ca6

Browse files
committed
Tweak dynahash.c to avoid wasting memory space in non-shared hash tables.
palloc() will normally round allocation requests up to the next power of 2, so make dynahash choose allocation sizes that are as close to a power of 2 as possible. Back-patch to 8.1 --- the problem exists further back, but a much larger patch would be needed and it doesn't seem worth taking any risks.
1 parent f5940e7 commit 2924ca6

File tree

2 files changed

+44
-11
lines changed

2 files changed

+44
-11
lines changed

src/backend/utils/hash/dynahash.c

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
*
1010
*
1111
* IDENTIFICATION
12-
* $PostgreSQL: pgsql/src/backend/utils/hash/dynahash.c,v 1.67 2006/03/05 15:58:46 momjian Exp $
12+
* $PostgreSQL: pgsql/src/backend/utils/hash/dynahash.c,v 1.68 2006/06/25 18:29:49 tgl Exp $
1313
*
1414
*-------------------------------------------------------------------------
1515
*/
@@ -68,6 +68,7 @@ static bool element_alloc(HTAB *hashp, int nelem);
6868
static bool dir_realloc(HTAB *hashp);
6969
static bool expand_table(HTAB *hashp);
7070
static void hdefault(HTAB *hashp);
71+
static int choose_nelem_alloc(Size entrysize);
7172
static bool init_htab(HTAB *hashp, long nelem);
7273
static void hash_corrupted(HTAB *hashp);
7374

@@ -262,8 +263,13 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
262263
/*
263264
* For a shared hash table, preallocate the requested number of elements.
264265
* This reduces problems with run-time out-of-shared-memory conditions.
266+
*
267+
* For a non-shared hash table, preallocate the requested number of
268+
* elements if it's less than our chosen nelem_alloc. This avoids
269+
* wasting space if the caller correctly estimates a small table size.
265270
*/
266-
if (flags & HASH_SHARED_MEM)
271+
if ((flags & HASH_SHARED_MEM) ||
272+
nelem < hctl->nelem_alloc)
267273
{
268274
if (!element_alloc(hashp, (int) nelem))
269275
{
@@ -305,6 +311,37 @@ hdefault(HTAB *hashp)
305311
hctl->freeList = NULL;
306312
}
307313

314+
/*
315+
* Given the user-specified entry size, choose nelem_alloc, ie, how many
316+
* elements to add to the hash table when we need more.
317+
*/
318+
static int
319+
choose_nelem_alloc(Size entrysize)
320+
{
321+
int nelem_alloc;
322+
Size elementSize;
323+
Size allocSize;
324+
325+
/* Each element has a HASHELEMENT header plus user data. */
326+
/* NB: this had better match element_alloc() */
327+
elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize);
328+
329+
/*
330+
* The idea here is to choose nelem_alloc at least 32, but round up
331+
* so that the allocation request will be a power of 2 or just less.
332+
* This makes little difference for hash tables in shared memory,
333+
* but for hash tables managed by palloc, the allocation request
334+
* will be rounded up to a power of 2 anyway. If we fail to take
335+
* this into account, we'll waste as much as half the allocated space.
336+
*/
337+
allocSize = 32 * 4; /* assume elementSize at least 8 */
338+
do {
339+
allocSize <<= 1;
340+
nelem_alloc = allocSize / elementSize;
341+
} while (nelem_alloc < 32);
342+
343+
return nelem_alloc;
344+
}
308345

309346
static bool
310347
init_htab(HTAB *hashp, long nelem)
@@ -364,8 +401,7 @@ init_htab(HTAB *hashp, long nelem)
364401
}
365402

366403
/* Choose number of entries to allocate at a time */
367-
hctl->nelem_alloc = (int) Min(nelem, HASHELEMENT_ALLOC_MAX);
368-
hctl->nelem_alloc = Max(hctl->nelem_alloc, 1);
404+
hctl->nelem_alloc = choose_nelem_alloc(hctl->entrysize);
369405

370406
#if HASH_DEBUG
371407
fprintf(stderr, "init_htab:\n%s%p\n%s%ld\n%s%ld\n%s%d\n%s%ld\n%s%u\n%s%x\n%s%x\n%s%ld\n%s%ld\n",
@@ -417,11 +453,10 @@ hash_estimate_size(long num_entries, Size entrysize)
417453
/* segments */
418454
size = add_size(size, mul_size(nSegments,
419455
MAXALIGN(DEF_SEGSIZE * sizeof(HASHBUCKET))));
420-
/* elements --- allocated in groups of up to HASHELEMENT_ALLOC_MAX */
421-
elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize);
422-
elementAllocCnt = Min(num_entries, HASHELEMENT_ALLOC_MAX);
423-
elementAllocCnt = Max(elementAllocCnt, 1);
456+
/* elements --- allocated in groups of choose_nelem_alloc() entries */
457+
elementAllocCnt = choose_nelem_alloc(entrysize);
424458
nElementAllocs = (num_entries - 1) / elementAllocCnt + 1;
459+
elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize);
425460
size = add_size(size,
426461
mul_size(nElementAllocs,
427462
mul_size(elementAllocCnt, elementSize)));

src/include/utils/hsearch.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/utils/hsearch.h,v 1.42 2006/03/05 15:59:07 momjian Exp $
10+
* $PostgreSQL: pgsql/src/include/utils/hsearch.h,v 1.43 2006/06/25 18:29:49 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -156,8 +156,6 @@ typedef struct HASHCTL
156156

157157
/* max_dsize value to indicate expansible directory */
158158
#define NO_MAX_DSIZE (-1)
159-
/* max number of hash elements allocated at once */
160-
#define HASHELEMENT_ALLOC_MAX (32)
161159

162160
/* hash_search operations */
163161
typedef enum

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy