Skip to content

Commit c422b5c

Browse files
committed
Code review for improved-hashing patch. Fix some portability issues
(char != unsigned char, Datum != uint32); make use of new hash code in dynahash hash tables and hash joins.
1 parent 1eb31d1 commit c422b5c

File tree

11 files changed

+114
-190
lines changed

11 files changed

+114
-190
lines changed

src/backend/access/hash/hash.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.55 2002/03/06 20:49:37 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.56 2002/03/09 17:35:35 tgl Exp $
1212
*
1313
* NOTES
1414
* This file contains only the public interface routines.
@@ -164,6 +164,9 @@ hashinsert(PG_FUNCTION_ARGS)
164164
Datum *datum = (Datum *) PG_GETARG_POINTER(1);
165165
char *nulls = (char *) PG_GETARG_POINTER(2);
166166
ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
167+
#ifdef NOT_USED
168+
Relation heapRel = (Relation) PG_GETARG_POINTER(4);
169+
#endif
167170

168171
InsertIndexResult res;
169172
HashItem hitem;

src/backend/access/hash/hashfunc.c

Lines changed: 57 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.32 2002/03/06 20:49:38 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.33 2002/03/09 17:35:35 tgl Exp $
1212
*
1313
* NOTES
1414
* These functions are stored in pg_amproc. For each operator class
@@ -58,23 +58,23 @@ hashfloat4(PG_FUNCTION_ARGS)
5858
{
5959
float4 key = PG_GETARG_FLOAT4(0);
6060

61-
return hash_any((char *) &key, sizeof(key));
61+
return hash_any((unsigned char *) &key, sizeof(key));
6262
}
6363

6464
Datum
6565
hashfloat8(PG_FUNCTION_ARGS)
6666
{
6767
float8 key = PG_GETARG_FLOAT8(0);
6868

69-
return hash_any((char *) &key, sizeof(key));
69+
return hash_any((unsigned char *) &key, sizeof(key));
7070
}
7171

7272
Datum
7373
hashoidvector(PG_FUNCTION_ARGS)
7474
{
7575
Oid *key = (Oid *) PG_GETARG_POINTER(0);
7676

77-
return hash_any((char *) key, INDEX_MAX_KEYS * sizeof(Oid));
77+
return hash_any((unsigned char *) key, INDEX_MAX_KEYS * sizeof(Oid));
7878
}
7979

8080
/*
@@ -87,17 +87,18 @@ hashint2vector(PG_FUNCTION_ARGS)
8787
{
8888
int16 *key = (int16 *) PG_GETARG_POINTER(0);
8989

90-
return hash_any((char *) key, INDEX_MAX_KEYS * sizeof(int16));
90+
return hash_any((unsigned char *) key, INDEX_MAX_KEYS * sizeof(int16));
9191
}
9292

9393
Datum
9494
hashname(PG_FUNCTION_ARGS)
9595
{
9696
char *key = NameStr(*PG_GETARG_NAME(0));
97+
int keylen = strlen(key);
9798

98-
Assert(strlen(key) <= NAMEDATALEN);
99+
Assert(keylen < NAMEDATALEN); /* else it's not truncated correctly */
99100

100-
return hash_any(key, strlen(key));
101+
return hash_any((unsigned char *) key, keylen);
101102
}
102103

103104
/*
@@ -110,21 +111,24 @@ hashvarlena(PG_FUNCTION_ARGS)
110111
struct varlena *key = PG_GETARG_VARLENA_P(0);
111112
Datum result;
112113

113-
result = hash_any(VARDATA(key), VARSIZE(key) - VARHDRSZ);
114+
result = hash_any((unsigned char *) VARDATA(key),
115+
VARSIZE(key) - VARHDRSZ);
114116

115117
/* Avoid leaking memory for toasted inputs */
116118
PG_FREE_IF_COPY(key, 0);
117119

118120
return result;
119121
}
120122

121-
/* This hash function was written by Bob Jenkins
123+
/*
124+
* This hash function was written by Bob Jenkins
122125
* (bob_jenkins@burtleburtle.net), and superficially adapted
123126
* for PostgreSQL by Neil Conway. For more information on this
124-
* hash function, see http://burtleburtle.net/bob/hash/doobs.html
127+
* hash function, see http://burtleburtle.net/bob/hash/doobs.html,
128+
* or Bob's article in Dr. Dobb's Journal, Sept. 1997.
125129
*/
126130

127-
/*
131+
/*----------
128132
* mix -- mix 3 32-bit values reversibly.
129133
* For every delta with one or two bits set, and the deltas of all three
130134
* high bits or all three low bits, whether the original value of a,b,c
@@ -133,6 +137,7 @@ hashvarlena(PG_FUNCTION_ARGS)
133137
* have at least 1/4 probability of changing.
134138
* - If mix() is run forward, every bit of c will change between 1/3 and
135139
* 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
140+
*----------
136141
*/
137142
#define mix(a,b,c) \
138143
{ \
@@ -151,56 +156,52 @@ hashvarlena(PG_FUNCTION_ARGS)
151156
* hash_any() -- hash a variable-length key into a 32-bit value
152157
* k : the key (the unaligned variable-length array of bytes)
153158
* len : the length of the key, counting by bytes
154-
* Returns a 32-bit value. Every bit of the key affects every bit of
159+
*
160+
* Returns a uint32 value. Every bit of the key affects every bit of
155161
* the return value. Every 1-bit and 2-bit delta achieves avalanche.
156162
* About 6*len+35 instructions. The best hash table sizes are powers
157163
* of 2. There is no need to do mod a prime (mod is sooo slow!).
158164
* If you need less than 32 bits, use a bitmask.
159165
*/
160166
Datum
161-
hash_any(register const char *k, register int keylen)
167+
hash_any(register const unsigned char *k, register int keylen)
162168
{
163-
register Datum a,b,c,len;
164-
165-
/* Set up the internal state */
166-
len = keylen;
167-
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
168-
/* Another arbitrary value. If the hash function is called
169-
* multiple times, this could be the previously generated
170-
* hash value; however, the interface currently doesn't allow
171-
* this. AFAIK this isn't a big deal.
172-
*/
173-
c = 3923095;
174-
175-
/* handle most of the key */
176-
while (len >= 12)
177-
{
178-
a += (k[0] +((Datum)k[1]<<8) +((Datum)k[2]<<16) +((Datum)k[3]<<24));
179-
b += (k[4] +((Datum)k[5]<<8) +((Datum)k[6]<<16) +((Datum)k[7]<<24));
180-
c += (k[8] +((Datum)k[9]<<8) +((Datum)k[10]<<16)+((Datum)k[11]<<24));
181-
mix(a,b,c);
182-
k += 12; len -= 12;
183-
}
184-
185-
/* handle the last 11 bytes */
186-
c += keylen;
187-
switch(len) /* all the case statements fall through */
188-
{
189-
case 11: c+=((Datum)k[10]<<24);
190-
case 10: c+=((Datum)k[9]<<16);
191-
case 9 : c+=((Datum)k[8]<<8);
192-
/* the first byte of c is reserved for the length */
193-
case 8 : b+=((Datum)k[7]<<24);
194-
case 7 : b+=((Datum)k[6]<<16);
195-
case 6 : b+=((Datum)k[5]<<8);
196-
case 5 : b+=k[4];
197-
case 4 : a+=((Datum)k[3]<<24);
198-
case 3 : a+=((Datum)k[2]<<16);
199-
case 2 : a+=((Datum)k[1]<<8);
200-
case 1 : a+=k[0];
201-
/* case 0: nothing left to add */
202-
}
203-
mix(a,b,c);
204-
/* report the result */
205-
return c;
169+
register uint32 a,b,c,len;
170+
171+
/* Set up the internal state */
172+
len = keylen;
173+
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
174+
c = 3923095; /* initialize with an arbitrary value */
175+
176+
/* handle most of the key */
177+
while (len >= 12)
178+
{
179+
a += (k[0] +((uint32)k[1]<<8) +((uint32)k[2]<<16) +((uint32)k[3]<<24));
180+
b += (k[4] +((uint32)k[5]<<8) +((uint32)k[6]<<16) +((uint32)k[7]<<24));
181+
c += (k[8] +((uint32)k[9]<<8) +((uint32)k[10]<<16)+((uint32)k[11]<<24));
182+
mix(a,b,c);
183+
k += 12; len -= 12;
184+
}
185+
186+
/* handle the last 11 bytes */
187+
c += keylen;
188+
switch (len) /* all the case statements fall through */
189+
{
190+
case 11: c+=((uint32)k[10]<<24);
191+
case 10: c+=((uint32)k[9]<<16);
192+
case 9 : c+=((uint32)k[8]<<8);
193+
/* the first byte of c is reserved for the length */
194+
case 8 : b+=((uint32)k[7]<<24);
195+
case 7 : b+=((uint32)k[6]<<16);
196+
case 6 : b+=((uint32)k[5]<<8);
197+
case 5 : b+=k[4];
198+
case 4 : a+=((uint32)k[3]<<24);
199+
case 3 : a+=((uint32)k[2]<<16);
200+
case 2 : a+=((uint32)k[1]<<8);
201+
case 1 : a+=k[0];
202+
/* case 0: nothing left to add */
203+
}
204+
mix(a,b,c);
205+
/* report the result */
206+
return UInt32GetDatum(c);
206207
}

src/backend/executor/nodeHash.c

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
99
*
10-
* $Id: nodeHash.c,v 1.61 2002/03/06 20:49:44 momjian Exp $
10+
* $Id: nodeHash.c,v 1.62 2002/03/09 17:35:35 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -22,6 +22,7 @@
2222
#include <sys/types.h>
2323
#include <math.h>
2424

25+
#include "access/hash.h"
2526
#include "executor/execdebug.h"
2627
#include "executor/nodeHash.h"
2728
#include "executor/nodeHashjoin.h"
@@ -31,7 +32,7 @@
3132
#include "utils/lsyscache.h"
3233

3334

34-
static int hashFunc(Datum key, int len, bool byVal);
35+
static uint32 hashFunc(Datum key, int len, bool byVal);
3536

3637
/* ----------------------------------------------------------------
3738
* ExecHash
@@ -553,7 +554,7 @@ ExecHashGetBucket(HashJoinTable hashtable,
553554
bucketno = hashFunc(keyval,
554555
(int) hashtable->typLen,
555556
hashtable->typByVal)
556-
% hashtable->totalbuckets;
557+
% (uint32) hashtable->totalbuckets;
557558
}
558559

559560
#ifdef HJDEBUG
@@ -624,30 +625,29 @@ ExecScanHashBucket(HashJoinState *hjstate,
624625
/* ----------------------------------------------------------------
625626
* hashFunc
626627
*
627-
* the hash function, copied from Margo
628+
* the hash function for hash joins
628629
*
629630
* XXX this probably ought to be replaced with datatype-specific
630631
* hash functions, such as those already implemented for hash indexes.
631632
* ----------------------------------------------------------------
632633
*/
633-
static int
634+
static uint32
634635
hashFunc(Datum key, int len, bool byVal)
635636
{
636-
unsigned int h = 0;
637+
unsigned char *k;
637638

638639
if (byVal)
639640
{
640641
/*
641-
* If it's a by-value data type, use the 'len' least significant
642-
* bytes of the Datum value. This should do the right thing on
643-
* either bigendian or littleendian hardware --- see the Datum
644-
* access macros in c.h.
642+
* If it's a by-value data type, just hash the whole Datum value.
643+
* This assumes that datatypes narrower than Datum are consistently
644+
* padded (either zero-extended or sign-extended, but not random
645+
* bits) to fill Datum; see the XXXGetDatum macros in postgres.h.
646+
* NOTE: it would not work to do hash_any(&key, len) since this
647+
* would get the wrong bytes on a big-endian machine.
645648
*/
646-
while (len-- > 0)
647-
{
648-
h = (h * PRIME1) ^ (key & 0xFF);
649-
key >>= 8;
650-
}
649+
k = (unsigned char *) &key;
650+
len = sizeof(Datum);
651651
}
652652
else
653653
{
@@ -662,8 +662,6 @@ hashFunc(Datum key, int len, bool byVal)
662662
* freeing the detoasted copy; that happens for free when the
663663
* per-tuple memory context is reset in ExecHashGetBucket.)
664664
*/
665-
unsigned char *k;
666-
667665
if (len < 0)
668666
{
669667
struct varlena *vkey = PG_DETOAST_DATUM(key);
@@ -673,12 +671,9 @@ hashFunc(Datum key, int len, bool byVal)
673671
}
674672
else
675673
k = (unsigned char *) DatumGetPointer(key);
676-
677-
while (len-- > 0)
678-
h = (h * PRIME1) ^ (*k++);
679674
}
680675

681-
return h % PRIME2;
676+
return DatumGetUInt32(hash_any(k, len));
682677
}
683678

684679
/* ----------------------------------------------------------------

src/backend/utils/adt/date.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/utils/adt/date.c,v 1.64 2001/11/21 05:57:33 thomas Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/utils/adt/date.c,v 1.65 2002/03/09 17:35:35 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -1116,7 +1116,7 @@ timetz_hash(PG_FUNCTION_ARGS)
11161116
* sizeof(TimeTzADT), so that any garbage pad bytes in the structure
11171117
* won't be included in the hash!
11181118
*/
1119-
return hash_any((char *) key, sizeof(double) + sizeof(int4));
1119+
return hash_any((unsigned char *) key, sizeof(double) + sizeof(int4));
11201120
}
11211121

11221122
Datum

src/backend/utils/adt/mac.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* PostgreSQL type definitions for MAC addresses.
33
*
4-
* $Header: /cvsroot/pgsql/src/backend/utils/adt/mac.c,v 1.21 2001/08/21 21:23:21 tgl Exp $
4+
* $Header: /cvsroot/pgsql/src/backend/utils/adt/mac.c,v 1.22 2002/03/09 17:35:35 tgl Exp $
55
*/
66

77
#include "postgres.h"
@@ -230,7 +230,7 @@ hashmacaddr(PG_FUNCTION_ARGS)
230230
{
231231
macaddr *key = PG_GETARG_MACADDR_P(0);
232232

233-
return hash_any((char *) key, sizeof(macaddr));
233+
return hash_any((unsigned char *) key, sizeof(macaddr));
234234
}
235235

236236
/*

src/backend/utils/adt/timestamp.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/utils/adt/timestamp.c,v 1.64 2002/03/06 06:10:18 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/utils/adt/timestamp.c,v 1.65 2002/03/09 17:35:36 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -1017,7 +1017,7 @@ interval_hash(PG_FUNCTION_ARGS)
10171017
* sizeof(Interval), so that any garbage pad bytes in the structure
10181018
* won't be included in the hash!
10191019
*/
1020-
return hash_any((char *) key, sizeof(double) + sizeof(int4));
1020+
return hash_any((unsigned char *) key, sizeof(double) + sizeof(int4));
10211021
}
10221022

10231023
/* overlaps_timestamp() --- implements the SQL92 OVERLAPS operator.

src/backend/utils/adt/varchar.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.87 2001/11/18 12:07:07 ishii Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.88 2002/03/09 17:35:36 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -791,7 +791,7 @@ hashbpchar(PG_FUNCTION_ARGS)
791791
keydata = VARDATA(key);
792792
keylen = bcTruelen(key);
793793

794-
result = hash_any(keydata, keylen);
794+
result = hash_any((unsigned char *) keydata, keylen);
795795

796796
/* Avoid leaking memory for toasted inputs */
797797
PG_FREE_IF_COPY(key, 0);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy