Skip to content

Commit e3a33a9

Browse files
committed
Marginal hack to avoid spending a lot of time in find_join_rel during
large planning problems: when the list of join rels gets too long, make an auxiliary hash table that hashes on the identifying Bitmapset.
1 parent 77c168a commit e3a33a9

File tree

9 files changed

+194
-25
lines changed

9 files changed

+194
-25
lines changed

src/backend/nodes/bitmapset.c

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* Copyright (c) 2003-2005, PostgreSQL Global Development Group
1515
*
1616
* IDENTIFICATION
17-
* $PostgreSQL: pgsql/src/backend/nodes/bitmapset.c,v 1.7 2005/01/01 20:44:15 tgl Exp $
17+
* $PostgreSQL: pgsql/src/backend/nodes/bitmapset.c,v 1.8 2005/06/08 23:02:04 tgl Exp $
1818
*
1919
*-------------------------------------------------------------------------
2020
*/
@@ -763,3 +763,28 @@ bms_first_member(Bitmapset *a)
763763
}
764764
return -1;
765765
}
766+
767+
/*
768+
* bms_hash_value - compute a hash key for a Bitmapset
769+
*
770+
* Note: we must ensure that any two bitmapsets that are bms_equal() will
771+
* hash to the same value; in practice this means that trailing all-zero
772+
* words cannot affect the result. Longitudinal XOR provides a reasonable
773+
* hash value that has this property.
774+
*/
775+
uint32
776+
bms_hash_value(const Bitmapset *a)
777+
{
778+
bitmapword result = 0;
779+
int nwords;
780+
int wordnum;
781+
782+
if (a == NULL)
783+
return 0; /* All empty sets hash to 0 */
784+
nwords = a->nwords;
785+
for (wordnum = 0; wordnum < nwords; wordnum++)
786+
{
787+
result ^= a->words[wordnum];
788+
}
789+
return (uint32) result;
790+
}

src/backend/optimizer/geqo/geqo_eval.c

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
9-
* $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.74 2005/06/05 22:32:55 tgl Exp $
9+
* $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.75 2005/06/08 23:02:04 tgl Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -47,7 +47,8 @@ geqo_eval(Gene *tour, int num_gene, GeqoEvalData *evaldata)
4747
MemoryContext oldcxt;
4848
RelOptInfo *joinrel;
4949
Cost fitness;
50-
List *savelist;
50+
int savelength;
51+
struct HTAB *savehash;
5152

5253
/*
5354
* Because gimme_tree considers both left- and right-sided trees,
@@ -83,13 +84,19 @@ geqo_eval(Gene *tour, int num_gene, GeqoEvalData *evaldata)
8384
* gimme_tree will add entries to root->join_rel_list, which may or may
8485
* not already contain some entries. The newly added entries will be
8586
* recycled by the MemoryContextDelete below, so we must ensure that
86-
* the list is restored to its former state before exiting. With the
87-
* new List implementation, the easiest way is to make a duplicate list
88-
* that gimme_tree can modify.
87+
* the list is restored to its former state before exiting. We can
88+
* do this by truncating the list to its original length. NOTE this
89+
* assumes that any added entries are appended at the end!
90+
*
91+
* We also must take care not to mess up the outer join_rel_hash,
92+
* if there is one. We can do this by just temporarily setting the
93+
* link to NULL. (If we are dealing with enough join rels, which we
94+
* very likely are, a new hash table will get built and used locally.)
8995
*/
90-
savelist = evaldata->root->join_rel_list;
96+
savelength = list_length(evaldata->root->join_rel_list);
97+
savehash = evaldata->root->join_rel_hash;
9198

92-
evaldata->root->join_rel_list = list_copy(savelist);
99+
evaldata->root->join_rel_hash = NULL;
93100

94101
/* construct the best path for the given combination of relations */
95102
joinrel = gimme_tree(tour, num_gene, evaldata);
@@ -105,8 +112,13 @@ geqo_eval(Gene *tour, int num_gene, GeqoEvalData *evaldata)
105112
else
106113
fitness = DBL_MAX;
107114

108-
/* restore join_rel_list */
109-
evaldata->root->join_rel_list = savelist;
115+
/*
116+
* Restore join_rel_list to its former state, and put back original
117+
* hashtable if any.
118+
*/
119+
evaldata->root->join_rel_list = list_truncate(evaldata->root->join_rel_list,
120+
savelength);
121+
evaldata->root->join_rel_hash = savehash;
110122

111123
/* release all the memory acquired within gimme_tree */
112124
MemoryContextSwitchTo(oldcxt);

src/backend/optimizer/geqo/geqo_main.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_main.c,v 1.49 2005/06/05 22:32:55 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_main.c,v 1.50 2005/06/08 23:02:04 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -252,7 +252,6 @@ geqo(PlannerInfo *root, int number_of_rels, List *initial_rels)
252252
*/
253253
best_tour = (Gene *) pool->data[0].string;
254254

255-
/* root->join_rel_list will be modified during this ! */
256255
best_rel = gimme_tree(best_tour, pool->string_length, &evaldata);
257256

258257
if (best_rel == NULL)

src/backend/optimizer/plan/planmain.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
*
1515
*
1616
* IDENTIFICATION
17-
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.83 2005/06/06 04:13:35 tgl Exp $
17+
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.84 2005/06/08 23:02:04 tgl Exp $
1818
*
1919
*-------------------------------------------------------------------------
2020
*/
@@ -116,6 +116,7 @@ query_planner(PlannerInfo *root, List *tlist, double tuple_fraction,
116116
root->base_rel_array = (RelOptInfo **)
117117
palloc0(root->base_rel_array_size * sizeof(RelOptInfo *));
118118
root->join_rel_list = NIL;
119+
root->join_rel_hash = NULL;
119120
root->equi_key_list = NIL;
120121

121122
/*

src/backend/optimizer/util/relnode.c

Lines changed: 101 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/optimizer/util/relnode.c,v 1.68 2005/06/06 04:13:36 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/optimizer/util/relnode.c,v 1.69 2005/06/08 23:02:05 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -21,8 +21,15 @@
2121
#include "optimizer/restrictinfo.h"
2222
#include "optimizer/tlist.h"
2323
#include "parser/parsetree.h"
24+
#include "utils/hsearch.h"
2425

2526

27+
typedef struct JoinHashEntry
28+
{
29+
Relids join_relids; /* hash key --- MUST BE FIRST */
30+
RelOptInfo *join_rel;
31+
} JoinHashEntry;
32+
2633
static RelOptInfo *make_reloptinfo(PlannerInfo *root, int relid,
2734
RelOptKind reloptkind);
2835
static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
@@ -197,6 +204,47 @@ find_base_rel(PlannerInfo *root, int relid)
197204
return NULL; /* keep compiler quiet */
198205
}
199206

207+
/*
208+
* build_join_rel_hash
209+
* Construct the auxiliary hash table for join relations.
210+
*/
211+
static void
212+
build_join_rel_hash(PlannerInfo *root)
213+
{
214+
HTAB *hashtab;
215+
HASHCTL hash_ctl;
216+
ListCell *l;
217+
218+
/* Create the hash table */
219+
MemSet(&hash_ctl, 0, sizeof(hash_ctl));
220+
hash_ctl.keysize = sizeof(Relids);
221+
hash_ctl.entrysize = sizeof(JoinHashEntry);
222+
hash_ctl.hash = bitmap_hash;
223+
hash_ctl.match = bitmap_match;
224+
hash_ctl.hcxt = CurrentMemoryContext;
225+
hashtab = hash_create("JoinRelHashTable",
226+
256L,
227+
&hash_ctl,
228+
HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
229+
230+
/* Insert all the already-existing joinrels */
231+
foreach(l, root->join_rel_list)
232+
{
233+
RelOptInfo *rel = (RelOptInfo *) lfirst(l);
234+
JoinHashEntry *hentry;
235+
bool found;
236+
237+
hentry = (JoinHashEntry *) hash_search(hashtab,
238+
&(rel->relids),
239+
HASH_ENTER,
240+
&found);
241+
Assert(!found);
242+
hentry->join_rel = rel;
243+
}
244+
245+
root->join_rel_hash = hashtab;
246+
}
247+
200248
/*
201249
* find_join_rel
202250
* Returns relation entry corresponding to 'relids' (a set of RT indexes),
@@ -205,14 +253,44 @@ find_base_rel(PlannerInfo *root, int relid)
205253
RelOptInfo *
206254
find_join_rel(PlannerInfo *root, Relids relids)
207255
{
208-
ListCell *l;
256+
/*
257+
* Switch to using hash lookup when list grows "too long". The threshold
258+
* is arbitrary and is known only here.
259+
*/
260+
if (!root->join_rel_hash && list_length(root->join_rel_list) > 32)
261+
build_join_rel_hash(root);
209262

210-
foreach(l, root->join_rel_list)
263+
/*
264+
* Use either hashtable lookup or linear search, as appropriate.
265+
*
266+
* Note: the seemingly redundant hashkey variable is used to avoid
267+
* taking the address of relids; unless the compiler is exceedingly
268+
* smart, doing so would force relids out of a register and thus
269+
* probably slow down the list-search case.
270+
*/
271+
if (root->join_rel_hash)
211272
{
212-
RelOptInfo *rel = (RelOptInfo *) lfirst(l);
273+
Relids hashkey = relids;
274+
JoinHashEntry *hentry;
275+
276+
hentry = (JoinHashEntry *) hash_search(root->join_rel_hash,
277+
&hashkey,
278+
HASH_FIND,
279+
NULL);
280+
if (hentry)
281+
return hentry->join_rel;
282+
}
283+
else
284+
{
285+
ListCell *l;
213286

214-
if (bms_equal(rel->relids, relids))
215-
return rel;
287+
foreach(l, root->join_rel_list)
288+
{
289+
RelOptInfo *rel = (RelOptInfo *) lfirst(l);
290+
291+
if (bms_equal(rel->relids, relids))
292+
return rel;
293+
}
216294
}
217295

218296
return NULL;
@@ -329,9 +407,24 @@ build_join_rel(PlannerInfo *root,
329407
jointype, restrictlist);
330408

331409
/*
332-
* Add the joinrel to the query's joinrel list.
410+
* Add the joinrel to the query's joinrel list, and store it into
411+
* the auxiliary hashtable if there is one. NB: GEQO requires us
412+
* to append the new joinrel to the end of the list!
333413
*/
334-
root->join_rel_list = lcons(joinrel, root->join_rel_list);
414+
root->join_rel_list = lappend(root->join_rel_list, joinrel);
415+
416+
if (root->join_rel_hash)
417+
{
418+
JoinHashEntry *hentry;
419+
bool found;
420+
421+
hentry = (JoinHashEntry *) hash_search(root->join_rel_hash,
422+
&(joinrel->relids),
423+
HASH_ENTER,
424+
&found);
425+
Assert(!found);
426+
hentry->join_rel = joinrel;
427+
}
335428

336429
return joinrel;
337430
}

src/backend/utils/hash/hashfn.c

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,14 @@
99
*
1010
*
1111
* IDENTIFICATION
12-
* $PostgreSQL: pgsql/src/backend/utils/hash/hashfn.c,v 1.23 2005/04/14 20:32:43 tgl Exp $
12+
* $PostgreSQL: pgsql/src/backend/utils/hash/hashfn.c,v 1.24 2005/06/08 23:02:05 tgl Exp $
1313
*
1414
*-------------------------------------------------------------------------
1515
*/
1616
#include "postgres.h"
1717

1818
#include "access/hash.h"
19+
#include "nodes/bitmapset.h"
1920
#include "utils/hsearch.h"
2021

2122

@@ -53,3 +54,26 @@ oid_hash(const void *key, Size keysize)
5354
/* We don't actually bother to do anything to the OID value ... */
5455
return (uint32) *((const Oid *) key);
5556
}
57+
58+
/*
59+
* bitmap_hash: hash function for keys that are (pointers to) Bitmapsets
60+
*
61+
* Note: don't forget to specify bitmap_match as the match function!
62+
*/
63+
uint32
64+
bitmap_hash(const void *key, Size keysize)
65+
{
66+
Assert(keysize == sizeof(Bitmapset *));
67+
return bms_hash_value(*((const Bitmapset * const *) key));
68+
}
69+
70+
/*
71+
* bitmap_match: match function to use with bitmap_hash
72+
*/
73+
int
74+
bitmap_match(const void *key1, const void *key2, Size keysize)
75+
{
76+
Assert(keysize == sizeof(Bitmapset *));
77+
return !bms_equal(*((const Bitmapset * const *) key1),
78+
*((const Bitmapset * const *) key2));
79+
}

src/include/nodes/bitmapset.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*
1414
* Copyright (c) 2003-2005, PostgreSQL Global Development Group
1515
*
16-
* $PostgreSQL: pgsql/src/include/nodes/bitmapset.h,v 1.6 2005/01/01 20:44:28 tgl Exp $
16+
* $PostgreSQL: pgsql/src/include/nodes/bitmapset.h,v 1.7 2005/06/08 23:02:05 tgl Exp $
1717
*
1818
*-------------------------------------------------------------------------
1919
*/
@@ -80,4 +80,7 @@ extern Bitmapset *bms_join(Bitmapset *a, Bitmapset *b);
8080
/* support for iterating through the integer elements of a set: */
8181
extern int bms_first_member(Bitmapset *a);
8282

83+
/* support for hashtables using Bitmapsets as keys: */
84+
extern uint32 bms_hash_value(const Bitmapset *a);
85+
8386
#endif /* BITMAPSET_H */

src/include/nodes/relation.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.111 2005/06/06 04:13:36 tgl Exp $
10+
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.112 2005/06/08 23:02:05 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -72,7 +72,17 @@ typedef struct PlannerInfo
7272
struct RelOptInfo **base_rel_array; /* All one-relation RelOptInfos */
7373
int base_rel_array_size; /* current allocated array len */
7474

75+
/*
76+
* join_rel_list is a list of all join-relation RelOptInfos we have
77+
* considered in this planning run. For small problems we just scan
78+
* the list to do lookups, but when there are many join relations we
79+
* build a hash table for faster lookups. The hash table is present
80+
* and valid when join_rel_hash is not NULL. Note that we still maintain
81+
* the list even when using the hash table for lookups; this simplifies
82+
* life for GEQO.
83+
*/
7584
List *join_rel_list; /* list of join-relation RelOptInfos */
85+
struct HTAB *join_rel_hash; /* optional hashtable for join relations */
7686

7787
List *equi_key_list; /* list of lists of equijoined
7888
* PathKeyItems */

src/include/utils/hsearch.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/utils/hsearch.h,v 1.36 2005/05/29 04:23:06 tgl Exp $
10+
* $PostgreSQL: pgsql/src/include/utils/hsearch.h,v 1.37 2005/06/08 23:02:05 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -184,5 +184,7 @@ extern long hash_select_dirsize(long num_entries);
184184
extern uint32 string_hash(const void *key, Size keysize);
185185
extern uint32 tag_hash(const void *key, Size keysize);
186186
extern uint32 oid_hash(const void *key, Size keysize);
187+
extern uint32 bitmap_hash(const void *key, Size keysize);
188+
extern int bitmap_match(const void *key1, const void *key2, Size keysize);
187189

188190
#endif /* HSEARCH_H */

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy